diff --git "a/graph.pbtxt" "b/graph.pbtxt" new file mode 100644--- /dev/null +++ "b/graph.pbtxt" @@ -0,0 +1,590792 @@ +node { + name: "global_step/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@global_step" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 0 + } + } + } +} +node { + name: "global_step" + op: "VarHandleOp" + attr { + key: "_class" + value { + list { + s: "loc:@global_step" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "shape" + value { + shape { + } + } + } + attr { + key: "shared_name" + value { + s: "global_step" + } + } +} +node { + name: "global_step/IsInitialized/VarIsInitializedOp" + op: "VarIsInitializedOp" + input: "global_step" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_step/Assign" + op: "AssignVariableOp" + input: "global_step" + input: "global_step/Initializer/zeros" + attr { + key: "_class" + value { + list { + s: "loc:@global_step" + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } +} +node { + name: "global_step/Read/ReadVariableOp" + op: "ReadVariableOp" + input: "global_step" + attr { + key: "_class" + value { + list { + s: "loc:@global_step" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } +} +node { + name: "global_step/VarIsInitializedOp" + op: "VarIsInitializedOp" + input: "global_step" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_step/cond/Switch" + op: "Switch" + input: "global_step/VarIsInitializedOp" + input: "global_step/VarIsInitializedOp" + attr { + key: "T" + value { + type: DT_BOOL + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + shape { + } + } + } + } +} +node { + name: "global_step/cond/switch_t" + op: "Identity" + input: "global_step/cond/Switch:1" + attr { + key: "T" + value { + type: DT_BOOL + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_step/cond/switch_f" + op: "Identity" + input: "global_step/cond/Switch" + attr { + key: "T" + value { + type: DT_BOOL + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_step/cond/pred_id" + op: "Identity" + input: "global_step/VarIsInitializedOp" + attr { + key: "T" + value { + type: DT_BOOL + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_step/cond/Read/ReadVariableOp" + op: "ReadVariableOp" + input: "global_step/cond/Read/ReadVariableOp/Switch:1" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } +} +node { + name: "global_step/cond/Read/ReadVariableOp/Switch" + op: "Switch" + input: "global_step" + input: "global_step/cond/pred_id" + attr { + key: "T" + value { + type: DT_RESOURCE + } + } + attr { + key: "_class" + value { + list { + s: "loc:@global_step" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + shape { + } + } + } + } +} +node { + name: "global_step/cond/Identity" + op: "Identity" + input: "global_step/cond/Read/ReadVariableOp" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_step/cond/Switch_1" + op: "Switch" + input: "global_step/Initializer/zeros" + input: "global_step/cond/pred_id" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@global_step" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + shape { + } + } + } + } +} +node { + name: "global_step/cond/Merge" + op: "Merge" + input: "global_step/cond/Switch_1" + input: "global_step/cond/Identity" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + shape { + } + } + } + } +} +node { + name: "global_step/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 0 + } + } + } +} +node { + name: "global_step/add" + op: "Add" + input: "global_step/cond/Merge" + input: "global_step/add/y" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Const" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 15 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 15 + } + } + string_val: "../data/tf_records/discharge_summary.tfrecord" + string_val: "../data/tf_records/physician.tfrecord" + string_val: "../data/tf_records/nursing.tfrecord" + string_val: "../data/tf_records/nursing_other.tfrecord" + string_val: "../data/tf_records/radiology.tfrecord" + string_val: "../data/tf_records/general.tfrecord" + string_val: "../data/tf_records/respiratory.tfrecord" + string_val: "../data/tf_records/consult.tfrecord" + string_val: "../data/tf_records/nutrition.tfrecord" + string_val: "../data/tf_records/case_management.tfrecord" + string_val: "../data/tf_records/pharmacy.tfrecord" + string_val: "../data/tf_records/rehab_services.tfrecord" + string_val: "../data/tf_records/social_work.tfrecord" + string_val: "../data/tf_records/ecg.tfrecord" + string_val: "../data/tf_records/echo.tfrecord" + } + } + } +} +node { + name: "count" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: -1 + } + } + } +} +node { + name: "buffer_size" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 15 + } + } + } +} +node { + name: "seed" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 0 + } + } + } +} +node { + name: "seed2" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 0 + } + } + } +} +node { + name: "cycle_length" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 4 + } + } + } +} +node { + name: "block_length" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 1 + } + } + } +} +node { + name: "sloppy" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_BOOL + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_BOOL + tensor_shape { + } + bool_val: true + } + } + } +} +node { + name: "buffer_output_elements" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 2 + } + } + } +} +node { + name: "prefetch_input_elements" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 8 + } + } + } +} +node { + name: "buffer_size_1" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 100 + } + } + } +} +node { + name: "seed_1" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 0 + } + } + } +} +node { + name: "seed2_1" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 0 + } + } + } +} +node { + name: "batch_size" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 32 + } + } + } +} +node { + name: "num_parallel_calls" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 128 + } + } + } +} +node { + name: "drop_remainder" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_BOOL + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_BOOL + tensor_shape { + } + bool_val: true + } + } + } +} +node { + name: "IteratorV2" + op: "IteratorV2" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + } + shape { + dim { + size: 32 + } + dim { + size: 128 + } + } + shape { + dim { + size: 32 + } + dim { + size: 20 + } + } + shape { + dim { + size: 32 + } + dim { + size: 20 + } + } + shape { + dim { + size: 32 + } + dim { + size: 20 + } + } + shape { + dim { + size: 32 + } + dim { + size: 1 + } + } + shape { + dim { + size: 32 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_INT32 + type: DT_INT32 + type: DT_INT32 + type: DT_INT32 + type: DT_FLOAT + type: DT_INT32 + type: DT_INT32 + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "TensorSliceDataset" + op: "TensorSliceDataset" + input: "Const" + attr { + key: "Toutput_types" + value { + list { + type: DT_STRING + } + } + } + attr { + key: "_class" + value { + list { + s: "loc:@IteratorV2" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "RepeatDataset" + op: "RepeatDataset" + input: "TensorSliceDataset" + input: "count" + attr { + key: "_class" + value { + list { + s: "loc:@IteratorV2" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_STRING + } + } + } +} +node { + name: "ShuffleDataset" + op: "ShuffleDataset" + input: "RepeatDataset" + input: "buffer_size" + input: "seed" + input: "seed2" + attr { + key: "_class" + value { + list { + s: "loc:@IteratorV2" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_STRING + } + } + } + attr { + key: "reshuffle_each_iteration" + value { + b: true + } + } +} +node { + name: "ParallelInterleaveDataset" + op: "ParallelInterleaveDataset" + input: "ShuffleDataset" + input: "cycle_length" + input: "block_length" + input: "sloppy" + input: "buffer_output_elements" + input: "prefetch_input_elements" + attr { + key: "Targuments" + value { + list { + } + } + } + attr { + key: "_class" + value { + list { + s: "loc:@IteratorV2" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "f" + value { + func { + name: "tf_data_structured_function_wrapper_rn3e6kArW78" + } + } + } + attr { + key: "output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_STRING + } + } + } +} +node { + name: "ShuffleDataset_1" + op: "ShuffleDataset" + input: "ParallelInterleaveDataset" + input: "buffer_size_1" + input: "seed_1" + input: "seed2_1" + attr { + key: "_class" + value { + list { + s: "loc:@IteratorV2" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_STRING + } + } + } + attr { + key: "reshuffle_each_iteration" + value { + b: true + } + } +} +node { + name: "MapAndBatchDatasetV2" + op: "MapAndBatchDatasetV2" + input: "ShuffleDataset_1" + input: "batch_size" + input: "num_parallel_calls" + input: "drop_remainder" + attr { + key: "Targuments" + value { + list { + } + } + } + attr { + key: "_class" + value { + list { + s: "loc:@IteratorV2" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "f" + value { + func { + name: "tf_data_structured_function_wrapper_9z4XDFOWUdQ" + } + } + } + attr { + key: "output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + } + shape { + dim { + size: 32 + } + dim { + size: 128 + } + } + shape { + dim { + size: 32 + } + dim { + size: 20 + } + } + shape { + dim { + size: 32 + } + dim { + size: 20 + } + } + shape { + dim { + size: 32 + } + dim { + size: 20 + } + } + shape { + dim { + size: 32 + } + dim { + size: 1 + } + } + shape { + dim { + size: 32 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_INT32 + type: DT_INT32 + type: DT_INT32 + type: DT_INT32 + type: DT_FLOAT + type: DT_INT32 + type: DT_INT32 + } + } + } +} +node { + name: "MakeIterator" + op: "MakeIterator" + input: "MapAndBatchDatasetV2" + input: "IteratorV2" + attr { + key: "_class" + value { + list { + s: "loc:@IteratorV2" + } + } + } +} +node { + name: "IteratorToStringHandle" + op: "IteratorToStringHandle" + input: "IteratorV2" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "IteratorGetNext" + op: "IteratorGetNext" + input: "IteratorV2" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + } + shape { + dim { + size: 32 + } + dim { + size: 128 + } + } + shape { + dim { + size: 32 + } + dim { + size: 20 + } + } + shape { + dim { + size: 32 + } + dim { + size: 20 + } + } + shape { + dim { + size: 32 + } + dim { + size: 20 + } + } + shape { + dim { + size: 32 + } + dim { + size: 1 + } + } + shape { + dim { + size: 32 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + } + shape { + dim { + size: 32 + } + dim { + size: 128 + } + } + shape { + dim { + size: 32 + } + dim { + size: 20 + } + } + shape { + dim { + size: 32 + } + dim { + size: 20 + } + } + shape { + dim { + size: 32 + } + dim { + size: 20 + } + } + shape { + dim { + size: 32 + } + dim { + size: 1 + } + } + shape { + dim { + size: 32 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_INT32 + type: DT_INT32 + type: DT_INT32 + type: DT_INT32 + type: DT_FLOAT + type: DT_INT32 + type: DT_INT32 + } + } + } +} +node { + name: "bert/embeddings/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -1 + } + } + } +} +node { + name: "bert/embeddings/ExpandDims" + op: "ExpandDims" + input: "IteratorGetNext" + input: "bert/embeddings/ExpandDims/dim" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tdim" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/embeddings/word_embeddings/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "Dq\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/embeddings/word_embeddings/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/word_embeddings/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/embeddings/word_embeddings/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/embeddings/word_embeddings/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/embeddings/word_embeddings/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/embeddings/word_embeddings/Initializer/truncated_normal/TruncatedNormal" + input: "bert/embeddings/word_embeddings/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/word_embeddings/Initializer/truncated_normal" + op: "Add" + input: "bert/embeddings/word_embeddings/Initializer/truncated_normal/mul" + input: "bert/embeddings/word_embeddings/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/word_embeddings" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 28996 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/word_embeddings/Assign" + op: "Assign" + input: "bert/embeddings/word_embeddings" + input: "bert/embeddings/word_embeddings/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/word_embeddings/read" + op: "Identity" + input: "bert/embeddings/word_embeddings" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/embedding_lookup/axis" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "bert/embeddings/embedding_lookup" + op: "GatherV2" + input: "bert/embeddings/word_embeddings/read" + input: "bert/embeddings/ExpandDims" + input: "bert/embeddings/embedding_lookup/axis" + attr { + key: "Taxis" + value { + type: DT_INT32 + } + } + attr { + key: "Tindices" + value { + type: DT_INT32 + } + } + attr { + key: "Tparams" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 1 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/embedding_lookup/Identity" + op: "Identity" + input: "bert/embeddings/embedding_lookup" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 1 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/embeddings/Reshape" + op: "Reshape" + input: "bert/embeddings/embedding_lookup/Identity" + input: "bert/embeddings/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\002\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal/TruncatedNormal" + input: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal" + op: "Add" + input: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal/mul" + input: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/token_type_embeddings" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/Assign" + op: "Assign" + input: "bert/embeddings/token_type_embeddings" + input: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/read" + op: "Identity" + input: "bert/embeddings/token_type_embeddings" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -1 + } + } + } +} +node { + name: "bert/embeddings/Reshape_1" + op: "Reshape" + input: "IteratorGetNext:6" + input: "bert/embeddings/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } +} +node { + name: "bert/embeddings/one_hot/on_value" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/embeddings/one_hot/off_value" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/one_hot/depth" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "bert/embeddings/one_hot" + op: "OneHot" + input: "bert/embeddings/Reshape_1" + input: "bert/embeddings/one_hot/depth" + input: "bert/embeddings/one_hot/on_value" + input: "bert/embeddings/one_hot/off_value" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "TI" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 2 + } + } + } + } + } + attr { + key: "axis" + value { + i: -1 + } + } +} +node { + name: "bert/embeddings/MatMul" + op: "MatMul" + input: "bert/embeddings/one_hot" + input: "bert/embeddings/token_type_embeddings/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/embeddings/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/embeddings/Reshape_2" + op: "Reshape" + input: "bert/embeddings/MatMul" + input: "bert/embeddings/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/add" + op: "Add" + input: "bert/embeddings/Reshape" + input: "bert/embeddings/Reshape_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/assert_less_equal/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 128 + } + } + } +} +node { + name: "bert/embeddings/assert_less_equal/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 512 + } + } + } +} +node { + name: "bert/embeddings/assert_less_equal/LessEqual" + op: "LessEqual" + input: "bert/embeddings/assert_less_equal/x" + input: "bert/embeddings/assert_less_equal/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/embeddings/assert_less_equal/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "bert/embeddings/assert_less_equal/All" + op: "All" + input: "bert/embeddings/assert_less_equal/LessEqual" + input: "bert/embeddings/assert_less_equal/Const" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "bert/embeddings/assert_less_equal/Assert/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "" + } + } + } +} +node { + name: "bert/embeddings/assert_less_equal/Assert/Const_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "Condition x <= y did not hold element-wise:x (bert/embeddings/assert_less_equal/x:0) = " + } + } + } +} +node { + name: "bert/embeddings/assert_less_equal/Assert/Const_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "y (bert/embeddings/assert_less_equal/y:0) = " + } + } + } +} +node { + name: "bert/embeddings/assert_less_equal/Assert/Assert/data_0" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "" + } + } + } +} +node { + name: "bert/embeddings/assert_less_equal/Assert/Assert/data_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "Condition x <= y did not hold element-wise:x (bert/embeddings/assert_less_equal/x:0) = " + } + } + } +} +node { + name: "bert/embeddings/assert_less_equal/Assert/Assert/data_3" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "y (bert/embeddings/assert_less_equal/y:0) = " + } + } + } +} +node { + name: "bert/embeddings/assert_less_equal/Assert/Assert" + op: "Assert" + input: "bert/embeddings/assert_less_equal/All" + input: "bert/embeddings/assert_less_equal/Assert/Assert/data_0" + input: "bert/embeddings/assert_less_equal/Assert/Assert/data_1" + input: "bert/embeddings/assert_less_equal/x" + input: "bert/embeddings/assert_less_equal/Assert/Assert/data_3" + input: "bert/embeddings/assert_less_equal/y" + attr { + key: "T" + value { + list { + type: DT_STRING + type: DT_STRING + type: DT_INT32 + type: DT_STRING + type: DT_INT32 + } + } + } + attr { + key: "summarize" + value { + i: 3 + } + } +} +node { + name: "bert/embeddings/position_embeddings/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\002\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/embeddings/position_embeddings/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/position_embeddings/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/embeddings/position_embeddings/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/embeddings/position_embeddings/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/embeddings/position_embeddings/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/embeddings/position_embeddings/Initializer/truncated_normal/TruncatedNormal" + input: "bert/embeddings/position_embeddings/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/position_embeddings/Initializer/truncated_normal" + op: "Add" + input: "bert/embeddings/position_embeddings/Initializer/truncated_normal/mul" + input: "bert/embeddings/position_embeddings/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/position_embeddings" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/position_embeddings/Assign" + op: "Assign" + input: "bert/embeddings/position_embeddings" + input: "bert/embeddings/position_embeddings/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/position_embeddings/read" + op: "Identity" + input: "bert/embeddings/position_embeddings" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/Slice/begin" + op: "Const" + input: "^bert/embeddings/assert_less_equal/Assert/Assert" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\000\000\000\000" + } + } + } +} +node { + name: "bert/embeddings/Slice/size" + op: "Const" + input: "^bert/embeddings/assert_less_equal/Assert/Assert" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\200\000\000\000\377\377\377\377" + } + } + } +} +node { + name: "bert/embeddings/Slice" + op: "Slice" + input: "bert/embeddings/position_embeddings/read" + input: "bert/embeddings/Slice/begin" + input: "bert/embeddings/Slice/size" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/Reshape_3/shape" + op: "Const" + input: "^bert/embeddings/assert_less_equal/Assert/Assert" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/embeddings/Reshape_3" + op: "Reshape" + input: "bert/embeddings/Slice" + input: "bert/embeddings/Reshape_3/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/add_1" + op: "Add" + input: "bert/embeddings/add" + input: "bert/embeddings/Reshape_3" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/embeddings/LayerNorm/beta" + input: "bert/embeddings/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/LayerNorm/beta/read" + op: "Identity" + input: "bert/embeddings/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/embeddings/LayerNorm/gamma" + input: "bert/embeddings/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/LayerNorm/gamma/read" + op: "Identity" + input: "bert/embeddings/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/moments/mean" + op: "Mean" + input: "bert/embeddings/add_1" + input: "bert/embeddings/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/embeddings/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/embeddings/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/embeddings/add_1" + input: "bert/embeddings/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/moments/variance" + op: "Mean" + input: "bert/embeddings/LayerNorm/moments/SquaredDifference" + input: "bert/embeddings/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/embeddings/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/embeddings/LayerNorm/moments/variance" + input: "bert/embeddings/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/embeddings/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/embeddings/LayerNorm/batchnorm/Rsqrt" + input: "bert/embeddings/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/embeddings/add_1" + input: "bert/embeddings/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/embeddings/LayerNorm/moments/mean" + input: "bert/embeddings/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/embeddings/LayerNorm/beta/read" + input: "bert/embeddings/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/embeddings/LayerNorm/batchnorm/mul_1" + input: "bert/embeddings/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/dropout/keep_prob" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "bert/embeddings/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/embeddings/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/embeddings/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/embeddings/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/embeddings/dropout/random_uniform/sub" + op: "Sub" + input: "bert/embeddings/dropout/random_uniform/max" + input: "bert/embeddings/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/embeddings/dropout/random_uniform/mul" + op: "Mul" + input: "bert/embeddings/dropout/random_uniform/RandomUniform" + input: "bert/embeddings/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/dropout/random_uniform" + op: "Add" + input: "bert/embeddings/dropout/random_uniform/mul" + input: "bert/embeddings/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/dropout/add" + op: "Add" + input: "bert/embeddings/dropout/keep_prob" + input: "bert/embeddings/dropout/random_uniform" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/dropout/Floor" + op: "Floor" + input: "bert/embeddings/dropout/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/dropout/div" + op: "RealDiv" + input: "bert/embeddings/LayerNorm/batchnorm/add_1" + input: "bert/embeddings/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/dropout/mul" + op: "Mul" + input: "bert/embeddings/dropout/div" + input: "bert/embeddings/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\001\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "bert/encoder/Reshape" + op: "Reshape" + input: "IteratorGetNext:1" + input: "bert/encoder/Reshape/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/Cast" + op: "Cast" + input: "bert/encoder/Reshape" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/ones/shape_as_tensor" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/ones/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/ones" + op: "Fill" + input: "bert/encoder/ones/shape_as_tensor" + input: "bert/encoder/ones/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/mul" + op: "Mul" + input: "bert/encoder/ones" + input: "bert/encoder/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\377\377\377\377\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/Reshape_1" + op: "Reshape" + input: "bert/embeddings/dropout/mul" + input: "bert/encoder/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_0/attention/self/query/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_0/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_0/attention/self/query/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_0/attention/self/query/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_0/attention/self/query/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/query/kernel" + input: "bert/encoder/layer_0/attention/self/query/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/kernel/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/self/query/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/query/bias" + input: "bert/encoder/layer_0/attention/self/query/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/bias/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/self/query/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/MatMul" + op: "MatMul" + input: "bert/encoder/Reshape_1" + input: "bert/encoder/layer_0/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_0/attention/self/query/MatMul" + input: "bert/encoder/layer_0/attention/self/query/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_0/attention/self/key/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_0/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_0/attention/self/key/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_0/attention/self/key/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_0/attention/self/key/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/key/kernel" + input: "bert/encoder/layer_0/attention/self/key/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/kernel/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/self/key/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/key/bias" + input: "bert/encoder/layer_0/attention/self/key/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/bias/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/self/key/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/MatMul" + op: "MatMul" + input: "bert/encoder/Reshape_1" + input: "bert/encoder/layer_0/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_0/attention/self/key/MatMul" + input: "bert/encoder/layer_0/attention/self/key/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_0/attention/self/value/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_0/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_0/attention/self/value/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_0/attention/self/value/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_0/attention/self/value/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/value/kernel" + input: "bert/encoder/layer_0/attention/self/value/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/kernel/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/self/value/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/value/bias" + input: "bert/encoder/layer_0/attention/self/value/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/bias/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/self/value/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/MatMul" + op: "MatMul" + input: "bert/encoder/Reshape_1" + input: "bert/encoder/layer_0/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_0/attention/self/value/MatMul" + input: "bert/encoder/layer_0/attention/self/value/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/Reshape" + op: "Reshape" + input: "bert/encoder/layer_0/attention/self/query/BiasAdd" + input: "bert/encoder/layer_0/attention/self/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/transpose" + op: "Transpose" + input: "bert/encoder/layer_0/attention/self/Reshape" + input: "bert/encoder/layer_0/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/Reshape_1" + op: "Reshape" + input: "bert/encoder/layer_0/attention/self/key/BiasAdd" + input: "bert/encoder/layer_0/attention/self/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/transpose_1" + op: "Transpose" + input: "bert/encoder/layer_0/attention/self/Reshape_1" + input: "bert/encoder/layer_0/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/MatMul" + op: "BatchMatMul" + input: "bert/encoder/layer_0/attention/self/transpose" + input: "bert/encoder/layer_0/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/Mul/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.125 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/Mul" + op: "Mul" + input: "bert/encoder/layer_0/attention/self/MatMul" + input: "bert/encoder/layer_0/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/ExpandDims" + op: "ExpandDims" + input: "bert/encoder/mul" + input: "bert/encoder/layer_0/attention/self/ExpandDims/dim" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tdim" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/sub" + op: "Sub" + input: "bert/encoder/layer_0/attention/self/sub/x" + input: "bert/encoder/layer_0/attention/self/ExpandDims" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/mul_1/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: -10000.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/mul_1" + op: "Mul" + input: "bert/encoder/layer_0/attention/self/sub" + input: "bert/encoder/layer_0/attention/self/mul_1/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/add" + op: "Add" + input: "bert/encoder/layer_0/attention/self/Mul" + input: "bert/encoder/layer_0/attention/self/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/Softmax" + op: "Softmax" + input: "bert/encoder/layer_0/attention/self/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/dropout/keep_prob" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_0/attention/self/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_0/attention/self/dropout/random_uniform/max" + input: "bert/encoder/layer_0/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_0/attention/self/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_0/attention/self/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_0/attention/self/dropout/random_uniform/mul" + input: "bert/encoder/layer_0/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/dropout/add" + op: "Add" + input: "bert/encoder/layer_0/attention/self/dropout/keep_prob" + input: "bert/encoder/layer_0/attention/self/dropout/random_uniform" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/dropout/Floor" + op: "Floor" + input: "bert/encoder/layer_0/attention/self/dropout/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/dropout/div" + op: "RealDiv" + input: "bert/encoder/layer_0/attention/self/Softmax" + input: "bert/encoder/layer_0/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_0/attention/self/dropout/div" + input: "bert/encoder/layer_0/attention/self/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/Reshape_2" + op: "Reshape" + input: "bert/encoder/layer_0/attention/self/value/BiasAdd" + input: "bert/encoder/layer_0/attention/self/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/transpose_2/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/transpose_2" + op: "Transpose" + input: "bert/encoder/layer_0/attention/self/Reshape_2" + input: "bert/encoder/layer_0/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/MatMul_1" + op: "BatchMatMul" + input: "bert/encoder/layer_0/attention/self/dropout/mul" + input: "bert/encoder/layer_0/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/transpose_3/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/transpose_3" + op: "Transpose" + input: "bert/encoder/layer_0/attention/self/MatMul_1" + input: "bert/encoder/layer_0/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/Reshape_3/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/Reshape_3" + op: "Reshape" + input: "bert/encoder/layer_0/attention/self/transpose_3" + input: "bert/encoder/layer_0/attention/self/Reshape_3/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_0/attention/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_0/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_0/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_0/attention/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_0/attention/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/dense/kernel" + input: "bert/encoder/layer_0/attention/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/dense/bias" + input: "bert/encoder/layer_0/attention/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_0/attention/self/Reshape_3" + input: "bert/encoder/layer_0/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_0/attention/output/dense/MatMul" + input: "bert/encoder/layer_0/attention/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dropout/keep_prob" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_0/attention/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_0/attention/output/dropout/random_uniform/max" + input: "bert/encoder/layer_0/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_0/attention/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_0/attention/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_0/attention/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_0/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dropout/add" + op: "Add" + input: "bert/encoder/layer_0/attention/output/dropout/keep_prob" + input: "bert/encoder/layer_0/attention/output/dropout/random_uniform" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dropout/Floor" + op: "Floor" + input: "bert/encoder/layer_0/attention/output/dropout/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dropout/div" + op: "RealDiv" + input: "bert/encoder/layer_0/attention/output/dense/BiasAdd" + input: "bert/encoder/layer_0/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_0/attention/output/dropout/div" + input: "bert/encoder/layer_0/attention/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/add" + op: "Add" + input: "bert/encoder/layer_0/attention/output/dropout/mul" + input: "bert/encoder/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_0/attention/output/add" + input: "bert/encoder/layer_0/attention/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_0/attention/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_0/attention/output/add" + input: "bert/encoder/layer_0/attention/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_0/attention/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_0/attention/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_0/attention/output/add" + input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_0/attention/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/read" + input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_0/intermediate/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_0/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_0/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_0/intermediate/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_0/intermediate/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_0/intermediate/dense/kernel" + input: "bert/encoder/layer_0/intermediate/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_0/intermediate/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/bias/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/bias/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_0/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_0/intermediate/dense/bias/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_0/intermediate/dense/bias" + input: "bert/encoder/layer_0/intermediate/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_0/intermediate/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_0/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_0/intermediate/dense/MatMul" + input: "bert/encoder/layer_0/intermediate/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/Sqrt/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/Sqrt" + op: "Sqrt" + input: "bert/encoder/layer_0/intermediate/dense/Sqrt/x" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/truediv" + op: "RealDiv" + input: "bert/encoder/layer_0/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_0/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/Erf" + op: "Erf" + input: "bert/encoder/layer_0/intermediate/dense/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/add/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/add" + op: "Add" + input: "bert/encoder/layer_0/intermediate/dense/add/x" + input: "bert/encoder/layer_0/intermediate/dense/Erf" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/mul/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.5 + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/mul" + op: "Mul" + input: "bert/encoder/layer_0/intermediate/dense/mul/x" + input: "bert/encoder/layer_0/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/mul_1" + op: "Mul" + input: "bert/encoder/layer_0/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_0/intermediate/dense/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_0/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_0/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_0/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_0/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_0/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_0/output/dense/kernel" + input: "bert/encoder/layer_0/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_0/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_0/output/dense/bias" + input: "bert/encoder/layer_0/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_0/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_0/intermediate/dense/mul_1" + input: "bert/encoder/layer_0/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_0/output/dense/MatMul" + input: "bert/encoder/layer_0/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_0/output/dropout/keep_prob" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_0/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_0/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_0/output/dropout/random_uniform/max" + input: "bert/encoder/layer_0/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_0/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_0/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_0/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_0/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dropout/add" + op: "Add" + input: "bert/encoder/layer_0/output/dropout/keep_prob" + input: "bert/encoder/layer_0/output/dropout/random_uniform" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dropout/Floor" + op: "Floor" + input: "bert/encoder/layer_0/output/dropout/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dropout/div" + op: "RealDiv" + input: "bert/encoder/layer_0/output/dense/BiasAdd" + input: "bert/encoder/layer_0/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_0/output/dropout/div" + input: "bert/encoder/layer_0/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/add" + op: "Add" + input: "bert/encoder/layer_0/output/dropout/mul" + input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_0/output/LayerNorm/beta" + input: "bert/encoder/layer_0/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_0/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_0/output/LayerNorm/gamma" + input: "bert/encoder/layer_0/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_0/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_0/output/add" + input: "bert/encoder/layer_0/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_0/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_0/output/add" + input: "bert/encoder/layer_0/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_0/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_0/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_0/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_0/output/add" + input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_0/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_0/output/LayerNorm/beta/read" + input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_1/attention/self/query/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_1/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_1/attention/self/query/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_1/attention/self/query/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_1/attention/self/query/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/query/kernel" + input: "bert/encoder/layer_1/attention/self/query/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/kernel/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/self/query/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/query/bias" + input: "bert/encoder/layer_1/attention/self/query/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/bias/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/self/query/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/MatMul" + op: "MatMul" + input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_1/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_1/attention/self/query/MatMul" + input: "bert/encoder/layer_1/attention/self/query/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_1/attention/self/key/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_1/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_1/attention/self/key/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_1/attention/self/key/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_1/attention/self/key/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/key/kernel" + input: "bert/encoder/layer_1/attention/self/key/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/kernel/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/self/key/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/key/bias" + input: "bert/encoder/layer_1/attention/self/key/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/bias/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/self/key/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/MatMul" + op: "MatMul" + input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_1/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_1/attention/self/key/MatMul" + input: "bert/encoder/layer_1/attention/self/key/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_1/attention/self/value/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_1/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_1/attention/self/value/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_1/attention/self/value/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_1/attention/self/value/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/value/kernel" + input: "bert/encoder/layer_1/attention/self/value/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/kernel/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/self/value/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/value/bias" + input: "bert/encoder/layer_1/attention/self/value/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/bias/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/self/value/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/MatMul" + op: "MatMul" + input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_1/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_1/attention/self/value/MatMul" + input: "bert/encoder/layer_1/attention/self/value/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/Reshape" + op: "Reshape" + input: "bert/encoder/layer_1/attention/self/query/BiasAdd" + input: "bert/encoder/layer_1/attention/self/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/transpose" + op: "Transpose" + input: "bert/encoder/layer_1/attention/self/Reshape" + input: "bert/encoder/layer_1/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/Reshape_1" + op: "Reshape" + input: "bert/encoder/layer_1/attention/self/key/BiasAdd" + input: "bert/encoder/layer_1/attention/self/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/transpose_1" + op: "Transpose" + input: "bert/encoder/layer_1/attention/self/Reshape_1" + input: "bert/encoder/layer_1/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/MatMul" + op: "BatchMatMul" + input: "bert/encoder/layer_1/attention/self/transpose" + input: "bert/encoder/layer_1/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/Mul/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.125 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/Mul" + op: "Mul" + input: "bert/encoder/layer_1/attention/self/MatMul" + input: "bert/encoder/layer_1/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/ExpandDims" + op: "ExpandDims" + input: "bert/encoder/mul" + input: "bert/encoder/layer_1/attention/self/ExpandDims/dim" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tdim" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/sub" + op: "Sub" + input: "bert/encoder/layer_1/attention/self/sub/x" + input: "bert/encoder/layer_1/attention/self/ExpandDims" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/mul_1/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: -10000.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/mul_1" + op: "Mul" + input: "bert/encoder/layer_1/attention/self/sub" + input: "bert/encoder/layer_1/attention/self/mul_1/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/add" + op: "Add" + input: "bert/encoder/layer_1/attention/self/Mul" + input: "bert/encoder/layer_1/attention/self/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/Softmax" + op: "Softmax" + input: "bert/encoder/layer_1/attention/self/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/dropout/keep_prob" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_1/attention/self/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_1/attention/self/dropout/random_uniform/max" + input: "bert/encoder/layer_1/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_1/attention/self/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_1/attention/self/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_1/attention/self/dropout/random_uniform/mul" + input: "bert/encoder/layer_1/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/dropout/add" + op: "Add" + input: "bert/encoder/layer_1/attention/self/dropout/keep_prob" + input: "bert/encoder/layer_1/attention/self/dropout/random_uniform" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/dropout/Floor" + op: "Floor" + input: "bert/encoder/layer_1/attention/self/dropout/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/dropout/div" + op: "RealDiv" + input: "bert/encoder/layer_1/attention/self/Softmax" + input: "bert/encoder/layer_1/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_1/attention/self/dropout/div" + input: "bert/encoder/layer_1/attention/self/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/Reshape_2" + op: "Reshape" + input: "bert/encoder/layer_1/attention/self/value/BiasAdd" + input: "bert/encoder/layer_1/attention/self/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/transpose_2/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/transpose_2" + op: "Transpose" + input: "bert/encoder/layer_1/attention/self/Reshape_2" + input: "bert/encoder/layer_1/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/MatMul_1" + op: "BatchMatMul" + input: "bert/encoder/layer_1/attention/self/dropout/mul" + input: "bert/encoder/layer_1/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/transpose_3/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/transpose_3" + op: "Transpose" + input: "bert/encoder/layer_1/attention/self/MatMul_1" + input: "bert/encoder/layer_1/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/Reshape_3/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/Reshape_3" + op: "Reshape" + input: "bert/encoder/layer_1/attention/self/transpose_3" + input: "bert/encoder/layer_1/attention/self/Reshape_3/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_1/attention/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_1/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_1/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_1/attention/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_1/attention/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/dense/kernel" + input: "bert/encoder/layer_1/attention/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/dense/bias" + input: "bert/encoder/layer_1/attention/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_1/attention/self/Reshape_3" + input: "bert/encoder/layer_1/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_1/attention/output/dense/MatMul" + input: "bert/encoder/layer_1/attention/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dropout/keep_prob" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_1/attention/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_1/attention/output/dropout/random_uniform/max" + input: "bert/encoder/layer_1/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_1/attention/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_1/attention/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_1/attention/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_1/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dropout/add" + op: "Add" + input: "bert/encoder/layer_1/attention/output/dropout/keep_prob" + input: "bert/encoder/layer_1/attention/output/dropout/random_uniform" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dropout/Floor" + op: "Floor" + input: "bert/encoder/layer_1/attention/output/dropout/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dropout/div" + op: "RealDiv" + input: "bert/encoder/layer_1/attention/output/dense/BiasAdd" + input: "bert/encoder/layer_1/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_1/attention/output/dropout/div" + input: "bert/encoder/layer_1/attention/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/add" + op: "Add" + input: "bert/encoder/layer_1/attention/output/dropout/mul" + input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_1/attention/output/add" + input: "bert/encoder/layer_1/attention/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_1/attention/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_1/attention/output/add" + input: "bert/encoder/layer_1/attention/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_1/attention/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_1/attention/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_1/attention/output/add" + input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_1/attention/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/read" + input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_1/intermediate/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_1/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_1/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_1/intermediate/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_1/intermediate/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_1/intermediate/dense/kernel" + input: "bert/encoder/layer_1/intermediate/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_1/intermediate/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/bias/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/bias/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_1/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_1/intermediate/dense/bias/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_1/intermediate/dense/bias" + input: "bert/encoder/layer_1/intermediate/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_1/intermediate/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_1/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_1/intermediate/dense/MatMul" + input: "bert/encoder/layer_1/intermediate/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/Sqrt/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/Sqrt" + op: "Sqrt" + input: "bert/encoder/layer_1/intermediate/dense/Sqrt/x" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/truediv" + op: "RealDiv" + input: "bert/encoder/layer_1/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_1/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/Erf" + op: "Erf" + input: "bert/encoder/layer_1/intermediate/dense/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/add/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/add" + op: "Add" + input: "bert/encoder/layer_1/intermediate/dense/add/x" + input: "bert/encoder/layer_1/intermediate/dense/Erf" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/mul/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.5 + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/mul" + op: "Mul" + input: "bert/encoder/layer_1/intermediate/dense/mul/x" + input: "bert/encoder/layer_1/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/mul_1" + op: "Mul" + input: "bert/encoder/layer_1/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_1/intermediate/dense/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_1/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_1/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_1/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_1/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_1/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_1/output/dense/kernel" + input: "bert/encoder/layer_1/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_1/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_1/output/dense/bias" + input: "bert/encoder/layer_1/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_1/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_1/intermediate/dense/mul_1" + input: "bert/encoder/layer_1/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_1/output/dense/MatMul" + input: "bert/encoder/layer_1/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_1/output/dropout/keep_prob" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_1/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_1/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_1/output/dropout/random_uniform/max" + input: "bert/encoder/layer_1/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_1/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_1/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_1/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_1/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dropout/add" + op: "Add" + input: "bert/encoder/layer_1/output/dropout/keep_prob" + input: "bert/encoder/layer_1/output/dropout/random_uniform" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dropout/Floor" + op: "Floor" + input: "bert/encoder/layer_1/output/dropout/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dropout/div" + op: "RealDiv" + input: "bert/encoder/layer_1/output/dense/BiasAdd" + input: "bert/encoder/layer_1/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_1/output/dropout/div" + input: "bert/encoder/layer_1/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/add" + op: "Add" + input: "bert/encoder/layer_1/output/dropout/mul" + input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_1/output/LayerNorm/beta" + input: "bert/encoder/layer_1/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_1/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_1/output/LayerNorm/gamma" + input: "bert/encoder/layer_1/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_1/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_1/output/add" + input: "bert/encoder/layer_1/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_1/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_1/output/add" + input: "bert/encoder/layer_1/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_1/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_1/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_1/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_1/output/add" + input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_1/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_1/output/LayerNorm/beta/read" + input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_2/attention/self/query/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_2/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_2/attention/self/query/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_2/attention/self/query/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_2/attention/self/query/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/query/kernel" + input: "bert/encoder/layer_2/attention/self/query/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/kernel/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/self/query/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/query/bias" + input: "bert/encoder/layer_2/attention/self/query/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/bias/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/self/query/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/MatMul" + op: "MatMul" + input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_2/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_2/attention/self/query/MatMul" + input: "bert/encoder/layer_2/attention/self/query/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_2/attention/self/key/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_2/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_2/attention/self/key/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_2/attention/self/key/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_2/attention/self/key/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/key/kernel" + input: "bert/encoder/layer_2/attention/self/key/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/kernel/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/self/key/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/key/bias" + input: "bert/encoder/layer_2/attention/self/key/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/bias/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/self/key/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/MatMul" + op: "MatMul" + input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_2/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_2/attention/self/key/MatMul" + input: "bert/encoder/layer_2/attention/self/key/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_2/attention/self/value/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_2/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_2/attention/self/value/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_2/attention/self/value/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_2/attention/self/value/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/value/kernel" + input: "bert/encoder/layer_2/attention/self/value/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/kernel/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/self/value/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/value/bias" + input: "bert/encoder/layer_2/attention/self/value/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/bias/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/self/value/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/MatMul" + op: "MatMul" + input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_2/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_2/attention/self/value/MatMul" + input: "bert/encoder/layer_2/attention/self/value/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/Reshape" + op: "Reshape" + input: "bert/encoder/layer_2/attention/self/query/BiasAdd" + input: "bert/encoder/layer_2/attention/self/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/transpose" + op: "Transpose" + input: "bert/encoder/layer_2/attention/self/Reshape" + input: "bert/encoder/layer_2/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/Reshape_1" + op: "Reshape" + input: "bert/encoder/layer_2/attention/self/key/BiasAdd" + input: "bert/encoder/layer_2/attention/self/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/transpose_1" + op: "Transpose" + input: "bert/encoder/layer_2/attention/self/Reshape_1" + input: "bert/encoder/layer_2/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/MatMul" + op: "BatchMatMul" + input: "bert/encoder/layer_2/attention/self/transpose" + input: "bert/encoder/layer_2/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/Mul/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.125 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/Mul" + op: "Mul" + input: "bert/encoder/layer_2/attention/self/MatMul" + input: "bert/encoder/layer_2/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/ExpandDims" + op: "ExpandDims" + input: "bert/encoder/mul" + input: "bert/encoder/layer_2/attention/self/ExpandDims/dim" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tdim" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/sub" + op: "Sub" + input: "bert/encoder/layer_2/attention/self/sub/x" + input: "bert/encoder/layer_2/attention/self/ExpandDims" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/mul_1/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: -10000.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/mul_1" + op: "Mul" + input: "bert/encoder/layer_2/attention/self/sub" + input: "bert/encoder/layer_2/attention/self/mul_1/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/add" + op: "Add" + input: "bert/encoder/layer_2/attention/self/Mul" + input: "bert/encoder/layer_2/attention/self/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/Softmax" + op: "Softmax" + input: "bert/encoder/layer_2/attention/self/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/dropout/keep_prob" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_2/attention/self/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_2/attention/self/dropout/random_uniform/max" + input: "bert/encoder/layer_2/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_2/attention/self/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_2/attention/self/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_2/attention/self/dropout/random_uniform/mul" + input: "bert/encoder/layer_2/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/dropout/add" + op: "Add" + input: "bert/encoder/layer_2/attention/self/dropout/keep_prob" + input: "bert/encoder/layer_2/attention/self/dropout/random_uniform" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/dropout/Floor" + op: "Floor" + input: "bert/encoder/layer_2/attention/self/dropout/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/dropout/div" + op: "RealDiv" + input: "bert/encoder/layer_2/attention/self/Softmax" + input: "bert/encoder/layer_2/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_2/attention/self/dropout/div" + input: "bert/encoder/layer_2/attention/self/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/Reshape_2" + op: "Reshape" + input: "bert/encoder/layer_2/attention/self/value/BiasAdd" + input: "bert/encoder/layer_2/attention/self/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/transpose_2/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/transpose_2" + op: "Transpose" + input: "bert/encoder/layer_2/attention/self/Reshape_2" + input: "bert/encoder/layer_2/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/MatMul_1" + op: "BatchMatMul" + input: "bert/encoder/layer_2/attention/self/dropout/mul" + input: "bert/encoder/layer_2/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/transpose_3/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/transpose_3" + op: "Transpose" + input: "bert/encoder/layer_2/attention/self/MatMul_1" + input: "bert/encoder/layer_2/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/Reshape_3/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/Reshape_3" + op: "Reshape" + input: "bert/encoder/layer_2/attention/self/transpose_3" + input: "bert/encoder/layer_2/attention/self/Reshape_3/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_2/attention/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_2/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_2/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_2/attention/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_2/attention/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/dense/kernel" + input: "bert/encoder/layer_2/attention/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/dense/bias" + input: "bert/encoder/layer_2/attention/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_2/attention/self/Reshape_3" + input: "bert/encoder/layer_2/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_2/attention/output/dense/MatMul" + input: "bert/encoder/layer_2/attention/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dropout/keep_prob" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_2/attention/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_2/attention/output/dropout/random_uniform/max" + input: "bert/encoder/layer_2/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_2/attention/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_2/attention/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_2/attention/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_2/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dropout/add" + op: "Add" + input: "bert/encoder/layer_2/attention/output/dropout/keep_prob" + input: "bert/encoder/layer_2/attention/output/dropout/random_uniform" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dropout/Floor" + op: "Floor" + input: "bert/encoder/layer_2/attention/output/dropout/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dropout/div" + op: "RealDiv" + input: "bert/encoder/layer_2/attention/output/dense/BiasAdd" + input: "bert/encoder/layer_2/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_2/attention/output/dropout/div" + input: "bert/encoder/layer_2/attention/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/add" + op: "Add" + input: "bert/encoder/layer_2/attention/output/dropout/mul" + input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_2/attention/output/add" + input: "bert/encoder/layer_2/attention/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_2/attention/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_2/attention/output/add" + input: "bert/encoder/layer_2/attention/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_2/attention/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_2/attention/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_2/attention/output/add" + input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_2/attention/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/read" + input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_2/intermediate/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_2/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_2/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_2/intermediate/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_2/intermediate/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_2/intermediate/dense/kernel" + input: "bert/encoder/layer_2/intermediate/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_2/intermediate/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/bias/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/bias/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_2/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_2/intermediate/dense/bias/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_2/intermediate/dense/bias" + input: "bert/encoder/layer_2/intermediate/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_2/intermediate/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_2/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_2/intermediate/dense/MatMul" + input: "bert/encoder/layer_2/intermediate/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/Sqrt/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/Sqrt" + op: "Sqrt" + input: "bert/encoder/layer_2/intermediate/dense/Sqrt/x" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/truediv" + op: "RealDiv" + input: "bert/encoder/layer_2/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_2/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/Erf" + op: "Erf" + input: "bert/encoder/layer_2/intermediate/dense/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/add/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/add" + op: "Add" + input: "bert/encoder/layer_2/intermediate/dense/add/x" + input: "bert/encoder/layer_2/intermediate/dense/Erf" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/mul/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.5 + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/mul" + op: "Mul" + input: "bert/encoder/layer_2/intermediate/dense/mul/x" + input: "bert/encoder/layer_2/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/mul_1" + op: "Mul" + input: "bert/encoder/layer_2/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_2/intermediate/dense/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_2/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_2/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_2/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_2/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_2/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_2/output/dense/kernel" + input: "bert/encoder/layer_2/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_2/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_2/output/dense/bias" + input: "bert/encoder/layer_2/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_2/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_2/intermediate/dense/mul_1" + input: "bert/encoder/layer_2/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_2/output/dense/MatMul" + input: "bert/encoder/layer_2/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_2/output/dropout/keep_prob" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_2/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_2/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_2/output/dropout/random_uniform/max" + input: "bert/encoder/layer_2/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_2/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_2/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_2/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_2/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dropout/add" + op: "Add" + input: "bert/encoder/layer_2/output/dropout/keep_prob" + input: "bert/encoder/layer_2/output/dropout/random_uniform" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dropout/Floor" + op: "Floor" + input: "bert/encoder/layer_2/output/dropout/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dropout/div" + op: "RealDiv" + input: "bert/encoder/layer_2/output/dense/BiasAdd" + input: "bert/encoder/layer_2/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_2/output/dropout/div" + input: "bert/encoder/layer_2/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/add" + op: "Add" + input: "bert/encoder/layer_2/output/dropout/mul" + input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_2/output/LayerNorm/beta" + input: "bert/encoder/layer_2/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_2/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_2/output/LayerNorm/gamma" + input: "bert/encoder/layer_2/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_2/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_2/output/add" + input: "bert/encoder/layer_2/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_2/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_2/output/add" + input: "bert/encoder/layer_2/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_2/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_2/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_2/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_2/output/add" + input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_2/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_2/output/LayerNorm/beta/read" + input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_3/attention/self/query/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_3/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_3/attention/self/query/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_3/attention/self/query/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_3/attention/self/query/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/query/kernel" + input: "bert/encoder/layer_3/attention/self/query/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/kernel/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/self/query/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/query/bias" + input: "bert/encoder/layer_3/attention/self/query/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/bias/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/self/query/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/MatMul" + op: "MatMul" + input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_3/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_3/attention/self/query/MatMul" + input: "bert/encoder/layer_3/attention/self/query/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_3/attention/self/key/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_3/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_3/attention/self/key/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_3/attention/self/key/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_3/attention/self/key/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/key/kernel" + input: "bert/encoder/layer_3/attention/self/key/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/kernel/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/self/key/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/key/bias" + input: "bert/encoder/layer_3/attention/self/key/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/bias/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/self/key/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/MatMul" + op: "MatMul" + input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_3/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_3/attention/self/key/MatMul" + input: "bert/encoder/layer_3/attention/self/key/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_3/attention/self/value/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_3/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_3/attention/self/value/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_3/attention/self/value/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_3/attention/self/value/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/value/kernel" + input: "bert/encoder/layer_3/attention/self/value/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/kernel/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/self/value/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/value/bias" + input: "bert/encoder/layer_3/attention/self/value/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/bias/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/self/value/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/MatMul" + op: "MatMul" + input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_3/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_3/attention/self/value/MatMul" + input: "bert/encoder/layer_3/attention/self/value/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/Reshape" + op: "Reshape" + input: "bert/encoder/layer_3/attention/self/query/BiasAdd" + input: "bert/encoder/layer_3/attention/self/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/transpose" + op: "Transpose" + input: "bert/encoder/layer_3/attention/self/Reshape" + input: "bert/encoder/layer_3/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/Reshape_1" + op: "Reshape" + input: "bert/encoder/layer_3/attention/self/key/BiasAdd" + input: "bert/encoder/layer_3/attention/self/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/transpose_1" + op: "Transpose" + input: "bert/encoder/layer_3/attention/self/Reshape_1" + input: "bert/encoder/layer_3/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/MatMul" + op: "BatchMatMul" + input: "bert/encoder/layer_3/attention/self/transpose" + input: "bert/encoder/layer_3/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/Mul/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.125 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/Mul" + op: "Mul" + input: "bert/encoder/layer_3/attention/self/MatMul" + input: "bert/encoder/layer_3/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/ExpandDims" + op: "ExpandDims" + input: "bert/encoder/mul" + input: "bert/encoder/layer_3/attention/self/ExpandDims/dim" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tdim" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/sub" + op: "Sub" + input: "bert/encoder/layer_3/attention/self/sub/x" + input: "bert/encoder/layer_3/attention/self/ExpandDims" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/mul_1/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: -10000.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/mul_1" + op: "Mul" + input: "bert/encoder/layer_3/attention/self/sub" + input: "bert/encoder/layer_3/attention/self/mul_1/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/add" + op: "Add" + input: "bert/encoder/layer_3/attention/self/Mul" + input: "bert/encoder/layer_3/attention/self/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/Softmax" + op: "Softmax" + input: "bert/encoder/layer_3/attention/self/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/dropout/keep_prob" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_3/attention/self/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_3/attention/self/dropout/random_uniform/max" + input: "bert/encoder/layer_3/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_3/attention/self/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_3/attention/self/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_3/attention/self/dropout/random_uniform/mul" + input: "bert/encoder/layer_3/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/dropout/add" + op: "Add" + input: "bert/encoder/layer_3/attention/self/dropout/keep_prob" + input: "bert/encoder/layer_3/attention/self/dropout/random_uniform" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/dropout/Floor" + op: "Floor" + input: "bert/encoder/layer_3/attention/self/dropout/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/dropout/div" + op: "RealDiv" + input: "bert/encoder/layer_3/attention/self/Softmax" + input: "bert/encoder/layer_3/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_3/attention/self/dropout/div" + input: "bert/encoder/layer_3/attention/self/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/Reshape_2" + op: "Reshape" + input: "bert/encoder/layer_3/attention/self/value/BiasAdd" + input: "bert/encoder/layer_3/attention/self/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/transpose_2/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/transpose_2" + op: "Transpose" + input: "bert/encoder/layer_3/attention/self/Reshape_2" + input: "bert/encoder/layer_3/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/MatMul_1" + op: "BatchMatMul" + input: "bert/encoder/layer_3/attention/self/dropout/mul" + input: "bert/encoder/layer_3/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/transpose_3/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/transpose_3" + op: "Transpose" + input: "bert/encoder/layer_3/attention/self/MatMul_1" + input: "bert/encoder/layer_3/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/Reshape_3/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/Reshape_3" + op: "Reshape" + input: "bert/encoder/layer_3/attention/self/transpose_3" + input: "bert/encoder/layer_3/attention/self/Reshape_3/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_3/attention/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_3/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_3/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_3/attention/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_3/attention/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/dense/kernel" + input: "bert/encoder/layer_3/attention/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/dense/bias" + input: "bert/encoder/layer_3/attention/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_3/attention/self/Reshape_3" + input: "bert/encoder/layer_3/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_3/attention/output/dense/MatMul" + input: "bert/encoder/layer_3/attention/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dropout/keep_prob" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_3/attention/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_3/attention/output/dropout/random_uniform/max" + input: "bert/encoder/layer_3/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_3/attention/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_3/attention/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_3/attention/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_3/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dropout/add" + op: "Add" + input: "bert/encoder/layer_3/attention/output/dropout/keep_prob" + input: "bert/encoder/layer_3/attention/output/dropout/random_uniform" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dropout/Floor" + op: "Floor" + input: "bert/encoder/layer_3/attention/output/dropout/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dropout/div" + op: "RealDiv" + input: "bert/encoder/layer_3/attention/output/dense/BiasAdd" + input: "bert/encoder/layer_3/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_3/attention/output/dropout/div" + input: "bert/encoder/layer_3/attention/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/add" + op: "Add" + input: "bert/encoder/layer_3/attention/output/dropout/mul" + input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_3/attention/output/add" + input: "bert/encoder/layer_3/attention/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_3/attention/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_3/attention/output/add" + input: "bert/encoder/layer_3/attention/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_3/attention/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_3/attention/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_3/attention/output/add" + input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_3/attention/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/read" + input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_3/intermediate/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_3/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_3/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_3/intermediate/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_3/intermediate/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_3/intermediate/dense/kernel" + input: "bert/encoder/layer_3/intermediate/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_3/intermediate/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/bias/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/bias/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_3/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_3/intermediate/dense/bias/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_3/intermediate/dense/bias" + input: "bert/encoder/layer_3/intermediate/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_3/intermediate/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_3/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_3/intermediate/dense/MatMul" + input: "bert/encoder/layer_3/intermediate/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/Sqrt/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/Sqrt" + op: "Sqrt" + input: "bert/encoder/layer_3/intermediate/dense/Sqrt/x" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/truediv" + op: "RealDiv" + input: "bert/encoder/layer_3/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_3/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/Erf" + op: "Erf" + input: "bert/encoder/layer_3/intermediate/dense/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/add/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/add" + op: "Add" + input: "bert/encoder/layer_3/intermediate/dense/add/x" + input: "bert/encoder/layer_3/intermediate/dense/Erf" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/mul/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.5 + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/mul" + op: "Mul" + input: "bert/encoder/layer_3/intermediate/dense/mul/x" + input: "bert/encoder/layer_3/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/mul_1" + op: "Mul" + input: "bert/encoder/layer_3/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_3/intermediate/dense/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_3/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_3/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_3/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_3/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_3/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_3/output/dense/kernel" + input: "bert/encoder/layer_3/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_3/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_3/output/dense/bias" + input: "bert/encoder/layer_3/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_3/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_3/intermediate/dense/mul_1" + input: "bert/encoder/layer_3/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_3/output/dense/MatMul" + input: "bert/encoder/layer_3/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_3/output/dropout/keep_prob" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_3/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_3/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_3/output/dropout/random_uniform/max" + input: "bert/encoder/layer_3/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_3/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_3/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_3/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_3/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dropout/add" + op: "Add" + input: "bert/encoder/layer_3/output/dropout/keep_prob" + input: "bert/encoder/layer_3/output/dropout/random_uniform" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dropout/Floor" + op: "Floor" + input: "bert/encoder/layer_3/output/dropout/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dropout/div" + op: "RealDiv" + input: "bert/encoder/layer_3/output/dense/BiasAdd" + input: "bert/encoder/layer_3/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_3/output/dropout/div" + input: "bert/encoder/layer_3/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/add" + op: "Add" + input: "bert/encoder/layer_3/output/dropout/mul" + input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_3/output/LayerNorm/beta" + input: "bert/encoder/layer_3/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_3/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_3/output/LayerNorm/gamma" + input: "bert/encoder/layer_3/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_3/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_3/output/add" + input: "bert/encoder/layer_3/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_3/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_3/output/add" + input: "bert/encoder/layer_3/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_3/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_3/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_3/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_3/output/add" + input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_3/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_3/output/LayerNorm/beta/read" + input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_4/attention/self/query/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_4/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_4/attention/self/query/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_4/attention/self/query/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_4/attention/self/query/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/query/kernel" + input: "bert/encoder/layer_4/attention/self/query/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/kernel/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/self/query/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/query/bias" + input: "bert/encoder/layer_4/attention/self/query/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/bias/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/self/query/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/MatMul" + op: "MatMul" + input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_4/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_4/attention/self/query/MatMul" + input: "bert/encoder/layer_4/attention/self/query/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_4/attention/self/key/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_4/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_4/attention/self/key/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_4/attention/self/key/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_4/attention/self/key/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/key/kernel" + input: "bert/encoder/layer_4/attention/self/key/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/kernel/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/self/key/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/key/bias" + input: "bert/encoder/layer_4/attention/self/key/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/bias/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/self/key/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/MatMul" + op: "MatMul" + input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_4/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_4/attention/self/key/MatMul" + input: "bert/encoder/layer_4/attention/self/key/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_4/attention/self/value/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_4/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_4/attention/self/value/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_4/attention/self/value/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_4/attention/self/value/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/value/kernel" + input: "bert/encoder/layer_4/attention/self/value/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/kernel/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/self/value/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/value/bias" + input: "bert/encoder/layer_4/attention/self/value/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/bias/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/self/value/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/MatMul" + op: "MatMul" + input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_4/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_4/attention/self/value/MatMul" + input: "bert/encoder/layer_4/attention/self/value/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/Reshape" + op: "Reshape" + input: "bert/encoder/layer_4/attention/self/query/BiasAdd" + input: "bert/encoder/layer_4/attention/self/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/transpose" + op: "Transpose" + input: "bert/encoder/layer_4/attention/self/Reshape" + input: "bert/encoder/layer_4/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/Reshape_1" + op: "Reshape" + input: "bert/encoder/layer_4/attention/self/key/BiasAdd" + input: "bert/encoder/layer_4/attention/self/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/transpose_1" + op: "Transpose" + input: "bert/encoder/layer_4/attention/self/Reshape_1" + input: "bert/encoder/layer_4/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/MatMul" + op: "BatchMatMul" + input: "bert/encoder/layer_4/attention/self/transpose" + input: "bert/encoder/layer_4/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/Mul/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.125 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/Mul" + op: "Mul" + input: "bert/encoder/layer_4/attention/self/MatMul" + input: "bert/encoder/layer_4/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/ExpandDims" + op: "ExpandDims" + input: "bert/encoder/mul" + input: "bert/encoder/layer_4/attention/self/ExpandDims/dim" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tdim" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/sub" + op: "Sub" + input: "bert/encoder/layer_4/attention/self/sub/x" + input: "bert/encoder/layer_4/attention/self/ExpandDims" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/mul_1/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: -10000.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/mul_1" + op: "Mul" + input: "bert/encoder/layer_4/attention/self/sub" + input: "bert/encoder/layer_4/attention/self/mul_1/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/add" + op: "Add" + input: "bert/encoder/layer_4/attention/self/Mul" + input: "bert/encoder/layer_4/attention/self/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/Softmax" + op: "Softmax" + input: "bert/encoder/layer_4/attention/self/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/dropout/keep_prob" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_4/attention/self/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_4/attention/self/dropout/random_uniform/max" + input: "bert/encoder/layer_4/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_4/attention/self/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_4/attention/self/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_4/attention/self/dropout/random_uniform/mul" + input: "bert/encoder/layer_4/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/dropout/add" + op: "Add" + input: "bert/encoder/layer_4/attention/self/dropout/keep_prob" + input: "bert/encoder/layer_4/attention/self/dropout/random_uniform" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/dropout/Floor" + op: "Floor" + input: "bert/encoder/layer_4/attention/self/dropout/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/dropout/div" + op: "RealDiv" + input: "bert/encoder/layer_4/attention/self/Softmax" + input: "bert/encoder/layer_4/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_4/attention/self/dropout/div" + input: "bert/encoder/layer_4/attention/self/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/Reshape_2" + op: "Reshape" + input: "bert/encoder/layer_4/attention/self/value/BiasAdd" + input: "bert/encoder/layer_4/attention/self/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/transpose_2/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/transpose_2" + op: "Transpose" + input: "bert/encoder/layer_4/attention/self/Reshape_2" + input: "bert/encoder/layer_4/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/MatMul_1" + op: "BatchMatMul" + input: "bert/encoder/layer_4/attention/self/dropout/mul" + input: "bert/encoder/layer_4/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/transpose_3/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/transpose_3" + op: "Transpose" + input: "bert/encoder/layer_4/attention/self/MatMul_1" + input: "bert/encoder/layer_4/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/Reshape_3/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/Reshape_3" + op: "Reshape" + input: "bert/encoder/layer_4/attention/self/transpose_3" + input: "bert/encoder/layer_4/attention/self/Reshape_3/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_4/attention/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_4/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_4/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_4/attention/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_4/attention/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/dense/kernel" + input: "bert/encoder/layer_4/attention/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/dense/bias" + input: "bert/encoder/layer_4/attention/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_4/attention/self/Reshape_3" + input: "bert/encoder/layer_4/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_4/attention/output/dense/MatMul" + input: "bert/encoder/layer_4/attention/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dropout/keep_prob" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_4/attention/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_4/attention/output/dropout/random_uniform/max" + input: "bert/encoder/layer_4/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_4/attention/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_4/attention/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_4/attention/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_4/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dropout/add" + op: "Add" + input: "bert/encoder/layer_4/attention/output/dropout/keep_prob" + input: "bert/encoder/layer_4/attention/output/dropout/random_uniform" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dropout/Floor" + op: "Floor" + input: "bert/encoder/layer_4/attention/output/dropout/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dropout/div" + op: "RealDiv" + input: "bert/encoder/layer_4/attention/output/dense/BiasAdd" + input: "bert/encoder/layer_4/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_4/attention/output/dropout/div" + input: "bert/encoder/layer_4/attention/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/add" + op: "Add" + input: "bert/encoder/layer_4/attention/output/dropout/mul" + input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_4/attention/output/add" + input: "bert/encoder/layer_4/attention/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_4/attention/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_4/attention/output/add" + input: "bert/encoder/layer_4/attention/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_4/attention/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_4/attention/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_4/attention/output/add" + input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_4/attention/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/read" + input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_4/intermediate/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_4/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_4/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_4/intermediate/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_4/intermediate/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_4/intermediate/dense/kernel" + input: "bert/encoder/layer_4/intermediate/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_4/intermediate/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/bias/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/bias/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_4/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_4/intermediate/dense/bias/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_4/intermediate/dense/bias" + input: "bert/encoder/layer_4/intermediate/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_4/intermediate/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_4/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_4/intermediate/dense/MatMul" + input: "bert/encoder/layer_4/intermediate/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/Sqrt/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/Sqrt" + op: "Sqrt" + input: "bert/encoder/layer_4/intermediate/dense/Sqrt/x" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/truediv" + op: "RealDiv" + input: "bert/encoder/layer_4/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_4/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/Erf" + op: "Erf" + input: "bert/encoder/layer_4/intermediate/dense/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/add/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/add" + op: "Add" + input: "bert/encoder/layer_4/intermediate/dense/add/x" + input: "bert/encoder/layer_4/intermediate/dense/Erf" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/mul/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.5 + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/mul" + op: "Mul" + input: "bert/encoder/layer_4/intermediate/dense/mul/x" + input: "bert/encoder/layer_4/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/mul_1" + op: "Mul" + input: "bert/encoder/layer_4/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_4/intermediate/dense/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_4/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_4/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_4/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_4/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_4/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_4/output/dense/kernel" + input: "bert/encoder/layer_4/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_4/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_4/output/dense/bias" + input: "bert/encoder/layer_4/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_4/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_4/intermediate/dense/mul_1" + input: "bert/encoder/layer_4/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_4/output/dense/MatMul" + input: "bert/encoder/layer_4/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_4/output/dropout/keep_prob" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_4/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_4/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_4/output/dropout/random_uniform/max" + input: "bert/encoder/layer_4/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_4/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_4/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_4/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_4/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dropout/add" + op: "Add" + input: "bert/encoder/layer_4/output/dropout/keep_prob" + input: "bert/encoder/layer_4/output/dropout/random_uniform" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dropout/Floor" + op: "Floor" + input: "bert/encoder/layer_4/output/dropout/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dropout/div" + op: "RealDiv" + input: "bert/encoder/layer_4/output/dense/BiasAdd" + input: "bert/encoder/layer_4/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_4/output/dropout/div" + input: "bert/encoder/layer_4/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/add" + op: "Add" + input: "bert/encoder/layer_4/output/dropout/mul" + input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_4/output/LayerNorm/beta" + input: "bert/encoder/layer_4/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_4/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_4/output/LayerNorm/gamma" + input: "bert/encoder/layer_4/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_4/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_4/output/add" + input: "bert/encoder/layer_4/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_4/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_4/output/add" + input: "bert/encoder/layer_4/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_4/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_4/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_4/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_4/output/add" + input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_4/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_4/output/LayerNorm/beta/read" + input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_5/attention/self/query/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_5/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_5/attention/self/query/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_5/attention/self/query/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_5/attention/self/query/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/query/kernel" + input: "bert/encoder/layer_5/attention/self/query/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/kernel/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/self/query/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/query/bias" + input: "bert/encoder/layer_5/attention/self/query/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/bias/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/self/query/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/MatMul" + op: "MatMul" + input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_5/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_5/attention/self/query/MatMul" + input: "bert/encoder/layer_5/attention/self/query/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_5/attention/self/key/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_5/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_5/attention/self/key/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_5/attention/self/key/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_5/attention/self/key/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/key/kernel" + input: "bert/encoder/layer_5/attention/self/key/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/kernel/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/self/key/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/key/bias" + input: "bert/encoder/layer_5/attention/self/key/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/bias/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/self/key/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/MatMul" + op: "MatMul" + input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_5/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_5/attention/self/key/MatMul" + input: "bert/encoder/layer_5/attention/self/key/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_5/attention/self/value/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_5/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_5/attention/self/value/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_5/attention/self/value/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_5/attention/self/value/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/value/kernel" + input: "bert/encoder/layer_5/attention/self/value/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/kernel/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/self/value/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/value/bias" + input: "bert/encoder/layer_5/attention/self/value/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/bias/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/self/value/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/MatMul" + op: "MatMul" + input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_5/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_5/attention/self/value/MatMul" + input: "bert/encoder/layer_5/attention/self/value/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/Reshape" + op: "Reshape" + input: "bert/encoder/layer_5/attention/self/query/BiasAdd" + input: "bert/encoder/layer_5/attention/self/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/transpose" + op: "Transpose" + input: "bert/encoder/layer_5/attention/self/Reshape" + input: "bert/encoder/layer_5/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/Reshape_1" + op: "Reshape" + input: "bert/encoder/layer_5/attention/self/key/BiasAdd" + input: "bert/encoder/layer_5/attention/self/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/transpose_1" + op: "Transpose" + input: "bert/encoder/layer_5/attention/self/Reshape_1" + input: "bert/encoder/layer_5/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/MatMul" + op: "BatchMatMul" + input: "bert/encoder/layer_5/attention/self/transpose" + input: "bert/encoder/layer_5/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/Mul/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.125 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/Mul" + op: "Mul" + input: "bert/encoder/layer_5/attention/self/MatMul" + input: "bert/encoder/layer_5/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/ExpandDims" + op: "ExpandDims" + input: "bert/encoder/mul" + input: "bert/encoder/layer_5/attention/self/ExpandDims/dim" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tdim" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/sub" + op: "Sub" + input: "bert/encoder/layer_5/attention/self/sub/x" + input: "bert/encoder/layer_5/attention/self/ExpandDims" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/mul_1/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: -10000.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/mul_1" + op: "Mul" + input: "bert/encoder/layer_5/attention/self/sub" + input: "bert/encoder/layer_5/attention/self/mul_1/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/add" + op: "Add" + input: "bert/encoder/layer_5/attention/self/Mul" + input: "bert/encoder/layer_5/attention/self/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/Softmax" + op: "Softmax" + input: "bert/encoder/layer_5/attention/self/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/dropout/keep_prob" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_5/attention/self/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_5/attention/self/dropout/random_uniform/max" + input: "bert/encoder/layer_5/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_5/attention/self/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_5/attention/self/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_5/attention/self/dropout/random_uniform/mul" + input: "bert/encoder/layer_5/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/dropout/add" + op: "Add" + input: "bert/encoder/layer_5/attention/self/dropout/keep_prob" + input: "bert/encoder/layer_5/attention/self/dropout/random_uniform" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/dropout/Floor" + op: "Floor" + input: "bert/encoder/layer_5/attention/self/dropout/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/dropout/div" + op: "RealDiv" + input: "bert/encoder/layer_5/attention/self/Softmax" + input: "bert/encoder/layer_5/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_5/attention/self/dropout/div" + input: "bert/encoder/layer_5/attention/self/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/Reshape_2" + op: "Reshape" + input: "bert/encoder/layer_5/attention/self/value/BiasAdd" + input: "bert/encoder/layer_5/attention/self/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/transpose_2/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/transpose_2" + op: "Transpose" + input: "bert/encoder/layer_5/attention/self/Reshape_2" + input: "bert/encoder/layer_5/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/MatMul_1" + op: "BatchMatMul" + input: "bert/encoder/layer_5/attention/self/dropout/mul" + input: "bert/encoder/layer_5/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/transpose_3/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/transpose_3" + op: "Transpose" + input: "bert/encoder/layer_5/attention/self/MatMul_1" + input: "bert/encoder/layer_5/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/Reshape_3/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/Reshape_3" + op: "Reshape" + input: "bert/encoder/layer_5/attention/self/transpose_3" + input: "bert/encoder/layer_5/attention/self/Reshape_3/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_5/attention/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_5/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_5/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_5/attention/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_5/attention/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/dense/kernel" + input: "bert/encoder/layer_5/attention/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/dense/bias" + input: "bert/encoder/layer_5/attention/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_5/attention/self/Reshape_3" + input: "bert/encoder/layer_5/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_5/attention/output/dense/MatMul" + input: "bert/encoder/layer_5/attention/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dropout/keep_prob" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_5/attention/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_5/attention/output/dropout/random_uniform/max" + input: "bert/encoder/layer_5/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_5/attention/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_5/attention/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_5/attention/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_5/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dropout/add" + op: "Add" + input: "bert/encoder/layer_5/attention/output/dropout/keep_prob" + input: "bert/encoder/layer_5/attention/output/dropout/random_uniform" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dropout/Floor" + op: "Floor" + input: "bert/encoder/layer_5/attention/output/dropout/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dropout/div" + op: "RealDiv" + input: "bert/encoder/layer_5/attention/output/dense/BiasAdd" + input: "bert/encoder/layer_5/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_5/attention/output/dropout/div" + input: "bert/encoder/layer_5/attention/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/add" + op: "Add" + input: "bert/encoder/layer_5/attention/output/dropout/mul" + input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_5/attention/output/add" + input: "bert/encoder/layer_5/attention/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_5/attention/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_5/attention/output/add" + input: "bert/encoder/layer_5/attention/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_5/attention/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_5/attention/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_5/attention/output/add" + input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_5/attention/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/read" + input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_5/intermediate/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_5/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_5/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_5/intermediate/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_5/intermediate/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_5/intermediate/dense/kernel" + input: "bert/encoder/layer_5/intermediate/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_5/intermediate/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/bias/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/bias/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_5/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_5/intermediate/dense/bias/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_5/intermediate/dense/bias" + input: "bert/encoder/layer_5/intermediate/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_5/intermediate/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_5/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_5/intermediate/dense/MatMul" + input: "bert/encoder/layer_5/intermediate/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/Sqrt/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/Sqrt" + op: "Sqrt" + input: "bert/encoder/layer_5/intermediate/dense/Sqrt/x" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/truediv" + op: "RealDiv" + input: "bert/encoder/layer_5/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_5/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/Erf" + op: "Erf" + input: "bert/encoder/layer_5/intermediate/dense/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/add/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/add" + op: "Add" + input: "bert/encoder/layer_5/intermediate/dense/add/x" + input: "bert/encoder/layer_5/intermediate/dense/Erf" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/mul/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.5 + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/mul" + op: "Mul" + input: "bert/encoder/layer_5/intermediate/dense/mul/x" + input: "bert/encoder/layer_5/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/mul_1" + op: "Mul" + input: "bert/encoder/layer_5/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_5/intermediate/dense/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_5/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_5/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_5/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_5/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_5/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_5/output/dense/kernel" + input: "bert/encoder/layer_5/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_5/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_5/output/dense/bias" + input: "bert/encoder/layer_5/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_5/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_5/intermediate/dense/mul_1" + input: "bert/encoder/layer_5/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_5/output/dense/MatMul" + input: "bert/encoder/layer_5/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_5/output/dropout/keep_prob" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_5/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_5/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_5/output/dropout/random_uniform/max" + input: "bert/encoder/layer_5/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_5/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_5/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_5/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_5/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dropout/add" + op: "Add" + input: "bert/encoder/layer_5/output/dropout/keep_prob" + input: "bert/encoder/layer_5/output/dropout/random_uniform" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dropout/Floor" + op: "Floor" + input: "bert/encoder/layer_5/output/dropout/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dropout/div" + op: "RealDiv" + input: "bert/encoder/layer_5/output/dense/BiasAdd" + input: "bert/encoder/layer_5/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_5/output/dropout/div" + input: "bert/encoder/layer_5/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/add" + op: "Add" + input: "bert/encoder/layer_5/output/dropout/mul" + input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_5/output/LayerNorm/beta" + input: "bert/encoder/layer_5/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_5/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_5/output/LayerNorm/gamma" + input: "bert/encoder/layer_5/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_5/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_5/output/add" + input: "bert/encoder/layer_5/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_5/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_5/output/add" + input: "bert/encoder/layer_5/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_5/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_5/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_5/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_5/output/add" + input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_5/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_5/output/LayerNorm/beta/read" + input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_6/attention/self/query/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_6/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_6/attention/self/query/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_6/attention/self/query/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_6/attention/self/query/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/query/kernel" + input: "bert/encoder/layer_6/attention/self/query/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/kernel/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/self/query/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/query/bias" + input: "bert/encoder/layer_6/attention/self/query/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/bias/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/self/query/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/MatMul" + op: "MatMul" + input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_6/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_6/attention/self/query/MatMul" + input: "bert/encoder/layer_6/attention/self/query/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_6/attention/self/key/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_6/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_6/attention/self/key/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_6/attention/self/key/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_6/attention/self/key/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/key/kernel" + input: "bert/encoder/layer_6/attention/self/key/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/kernel/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/self/key/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/key/bias" + input: "bert/encoder/layer_6/attention/self/key/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/bias/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/self/key/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/MatMul" + op: "MatMul" + input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_6/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_6/attention/self/key/MatMul" + input: "bert/encoder/layer_6/attention/self/key/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_6/attention/self/value/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_6/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_6/attention/self/value/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_6/attention/self/value/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_6/attention/self/value/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/value/kernel" + input: "bert/encoder/layer_6/attention/self/value/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/kernel/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/self/value/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/value/bias" + input: "bert/encoder/layer_6/attention/self/value/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/bias/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/self/value/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/MatMul" + op: "MatMul" + input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_6/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_6/attention/self/value/MatMul" + input: "bert/encoder/layer_6/attention/self/value/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/Reshape" + op: "Reshape" + input: "bert/encoder/layer_6/attention/self/query/BiasAdd" + input: "bert/encoder/layer_6/attention/self/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/transpose" + op: "Transpose" + input: "bert/encoder/layer_6/attention/self/Reshape" + input: "bert/encoder/layer_6/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/Reshape_1" + op: "Reshape" + input: "bert/encoder/layer_6/attention/self/key/BiasAdd" + input: "bert/encoder/layer_6/attention/self/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/transpose_1" + op: "Transpose" + input: "bert/encoder/layer_6/attention/self/Reshape_1" + input: "bert/encoder/layer_6/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/MatMul" + op: "BatchMatMul" + input: "bert/encoder/layer_6/attention/self/transpose" + input: "bert/encoder/layer_6/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/Mul/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.125 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/Mul" + op: "Mul" + input: "bert/encoder/layer_6/attention/self/MatMul" + input: "bert/encoder/layer_6/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/ExpandDims" + op: "ExpandDims" + input: "bert/encoder/mul" + input: "bert/encoder/layer_6/attention/self/ExpandDims/dim" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tdim" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/sub" + op: "Sub" + input: "bert/encoder/layer_6/attention/self/sub/x" + input: "bert/encoder/layer_6/attention/self/ExpandDims" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/mul_1/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: -10000.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/mul_1" + op: "Mul" + input: "bert/encoder/layer_6/attention/self/sub" + input: "bert/encoder/layer_6/attention/self/mul_1/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/add" + op: "Add" + input: "bert/encoder/layer_6/attention/self/Mul" + input: "bert/encoder/layer_6/attention/self/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/Softmax" + op: "Softmax" + input: "bert/encoder/layer_6/attention/self/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/dropout/keep_prob" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_6/attention/self/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_6/attention/self/dropout/random_uniform/max" + input: "bert/encoder/layer_6/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_6/attention/self/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_6/attention/self/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_6/attention/self/dropout/random_uniform/mul" + input: "bert/encoder/layer_6/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/dropout/add" + op: "Add" + input: "bert/encoder/layer_6/attention/self/dropout/keep_prob" + input: "bert/encoder/layer_6/attention/self/dropout/random_uniform" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/dropout/Floor" + op: "Floor" + input: "bert/encoder/layer_6/attention/self/dropout/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/dropout/div" + op: "RealDiv" + input: "bert/encoder/layer_6/attention/self/Softmax" + input: "bert/encoder/layer_6/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_6/attention/self/dropout/div" + input: "bert/encoder/layer_6/attention/self/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/Reshape_2" + op: "Reshape" + input: "bert/encoder/layer_6/attention/self/value/BiasAdd" + input: "bert/encoder/layer_6/attention/self/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/transpose_2/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/transpose_2" + op: "Transpose" + input: "bert/encoder/layer_6/attention/self/Reshape_2" + input: "bert/encoder/layer_6/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/MatMul_1" + op: "BatchMatMul" + input: "bert/encoder/layer_6/attention/self/dropout/mul" + input: "bert/encoder/layer_6/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/transpose_3/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/transpose_3" + op: "Transpose" + input: "bert/encoder/layer_6/attention/self/MatMul_1" + input: "bert/encoder/layer_6/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/Reshape_3/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/Reshape_3" + op: "Reshape" + input: "bert/encoder/layer_6/attention/self/transpose_3" + input: "bert/encoder/layer_6/attention/self/Reshape_3/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_6/attention/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_6/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_6/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_6/attention/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_6/attention/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/dense/kernel" + input: "bert/encoder/layer_6/attention/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/dense/bias" + input: "bert/encoder/layer_6/attention/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_6/attention/self/Reshape_3" + input: "bert/encoder/layer_6/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_6/attention/output/dense/MatMul" + input: "bert/encoder/layer_6/attention/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dropout/keep_prob" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_6/attention/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_6/attention/output/dropout/random_uniform/max" + input: "bert/encoder/layer_6/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_6/attention/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_6/attention/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_6/attention/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_6/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dropout/add" + op: "Add" + input: "bert/encoder/layer_6/attention/output/dropout/keep_prob" + input: "bert/encoder/layer_6/attention/output/dropout/random_uniform" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dropout/Floor" + op: "Floor" + input: "bert/encoder/layer_6/attention/output/dropout/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dropout/div" + op: "RealDiv" + input: "bert/encoder/layer_6/attention/output/dense/BiasAdd" + input: "bert/encoder/layer_6/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_6/attention/output/dropout/div" + input: "bert/encoder/layer_6/attention/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/add" + op: "Add" + input: "bert/encoder/layer_6/attention/output/dropout/mul" + input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_6/attention/output/add" + input: "bert/encoder/layer_6/attention/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_6/attention/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_6/attention/output/add" + input: "bert/encoder/layer_6/attention/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_6/attention/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_6/attention/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_6/attention/output/add" + input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_6/attention/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/read" + input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_6/intermediate/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_6/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_6/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_6/intermediate/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_6/intermediate/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_6/intermediate/dense/kernel" + input: "bert/encoder/layer_6/intermediate/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_6/intermediate/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/bias/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/bias/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_6/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_6/intermediate/dense/bias/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_6/intermediate/dense/bias" + input: "bert/encoder/layer_6/intermediate/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_6/intermediate/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_6/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_6/intermediate/dense/MatMul" + input: "bert/encoder/layer_6/intermediate/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/Sqrt/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/Sqrt" + op: "Sqrt" + input: "bert/encoder/layer_6/intermediate/dense/Sqrt/x" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/truediv" + op: "RealDiv" + input: "bert/encoder/layer_6/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_6/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/Erf" + op: "Erf" + input: "bert/encoder/layer_6/intermediate/dense/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/add/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/add" + op: "Add" + input: "bert/encoder/layer_6/intermediate/dense/add/x" + input: "bert/encoder/layer_6/intermediate/dense/Erf" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/mul/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.5 + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/mul" + op: "Mul" + input: "bert/encoder/layer_6/intermediate/dense/mul/x" + input: "bert/encoder/layer_6/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/mul_1" + op: "Mul" + input: "bert/encoder/layer_6/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_6/intermediate/dense/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_6/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_6/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_6/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_6/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_6/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_6/output/dense/kernel" + input: "bert/encoder/layer_6/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_6/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_6/output/dense/bias" + input: "bert/encoder/layer_6/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_6/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_6/intermediate/dense/mul_1" + input: "bert/encoder/layer_6/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_6/output/dense/MatMul" + input: "bert/encoder/layer_6/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_6/output/dropout/keep_prob" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_6/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_6/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_6/output/dropout/random_uniform/max" + input: "bert/encoder/layer_6/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_6/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_6/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_6/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_6/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dropout/add" + op: "Add" + input: "bert/encoder/layer_6/output/dropout/keep_prob" + input: "bert/encoder/layer_6/output/dropout/random_uniform" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dropout/Floor" + op: "Floor" + input: "bert/encoder/layer_6/output/dropout/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dropout/div" + op: "RealDiv" + input: "bert/encoder/layer_6/output/dense/BiasAdd" + input: "bert/encoder/layer_6/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_6/output/dropout/div" + input: "bert/encoder/layer_6/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/add" + op: "Add" + input: "bert/encoder/layer_6/output/dropout/mul" + input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_6/output/LayerNorm/beta" + input: "bert/encoder/layer_6/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_6/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_6/output/LayerNorm/gamma" + input: "bert/encoder/layer_6/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_6/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_6/output/add" + input: "bert/encoder/layer_6/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_6/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_6/output/add" + input: "bert/encoder/layer_6/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_6/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_6/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_6/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_6/output/add" + input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_6/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_6/output/LayerNorm/beta/read" + input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_7/attention/self/query/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_7/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_7/attention/self/query/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_7/attention/self/query/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_7/attention/self/query/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/query/kernel" + input: "bert/encoder/layer_7/attention/self/query/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/kernel/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/self/query/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/query/bias" + input: "bert/encoder/layer_7/attention/self/query/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/bias/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/self/query/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/MatMul" + op: "MatMul" + input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_7/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_7/attention/self/query/MatMul" + input: "bert/encoder/layer_7/attention/self/query/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_7/attention/self/key/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_7/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_7/attention/self/key/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_7/attention/self/key/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_7/attention/self/key/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/key/kernel" + input: "bert/encoder/layer_7/attention/self/key/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/kernel/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/self/key/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/key/bias" + input: "bert/encoder/layer_7/attention/self/key/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/bias/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/self/key/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/MatMul" + op: "MatMul" + input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_7/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_7/attention/self/key/MatMul" + input: "bert/encoder/layer_7/attention/self/key/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_7/attention/self/value/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_7/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_7/attention/self/value/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_7/attention/self/value/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_7/attention/self/value/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/value/kernel" + input: "bert/encoder/layer_7/attention/self/value/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/kernel/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/self/value/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/value/bias" + input: "bert/encoder/layer_7/attention/self/value/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/bias/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/self/value/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/MatMul" + op: "MatMul" + input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_7/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_7/attention/self/value/MatMul" + input: "bert/encoder/layer_7/attention/self/value/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/Reshape" + op: "Reshape" + input: "bert/encoder/layer_7/attention/self/query/BiasAdd" + input: "bert/encoder/layer_7/attention/self/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/transpose" + op: "Transpose" + input: "bert/encoder/layer_7/attention/self/Reshape" + input: "bert/encoder/layer_7/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/Reshape_1" + op: "Reshape" + input: "bert/encoder/layer_7/attention/self/key/BiasAdd" + input: "bert/encoder/layer_7/attention/self/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/transpose_1" + op: "Transpose" + input: "bert/encoder/layer_7/attention/self/Reshape_1" + input: "bert/encoder/layer_7/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/MatMul" + op: "BatchMatMul" + input: "bert/encoder/layer_7/attention/self/transpose" + input: "bert/encoder/layer_7/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/Mul/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.125 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/Mul" + op: "Mul" + input: "bert/encoder/layer_7/attention/self/MatMul" + input: "bert/encoder/layer_7/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/ExpandDims" + op: "ExpandDims" + input: "bert/encoder/mul" + input: "bert/encoder/layer_7/attention/self/ExpandDims/dim" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tdim" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/sub" + op: "Sub" + input: "bert/encoder/layer_7/attention/self/sub/x" + input: "bert/encoder/layer_7/attention/self/ExpandDims" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/mul_1/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: -10000.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/mul_1" + op: "Mul" + input: "bert/encoder/layer_7/attention/self/sub" + input: "bert/encoder/layer_7/attention/self/mul_1/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/add" + op: "Add" + input: "bert/encoder/layer_7/attention/self/Mul" + input: "bert/encoder/layer_7/attention/self/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/Softmax" + op: "Softmax" + input: "bert/encoder/layer_7/attention/self/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/dropout/keep_prob" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_7/attention/self/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_7/attention/self/dropout/random_uniform/max" + input: "bert/encoder/layer_7/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_7/attention/self/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_7/attention/self/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_7/attention/self/dropout/random_uniform/mul" + input: "bert/encoder/layer_7/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/dropout/add" + op: "Add" + input: "bert/encoder/layer_7/attention/self/dropout/keep_prob" + input: "bert/encoder/layer_7/attention/self/dropout/random_uniform" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/dropout/Floor" + op: "Floor" + input: "bert/encoder/layer_7/attention/self/dropout/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/dropout/div" + op: "RealDiv" + input: "bert/encoder/layer_7/attention/self/Softmax" + input: "bert/encoder/layer_7/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_7/attention/self/dropout/div" + input: "bert/encoder/layer_7/attention/self/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/Reshape_2" + op: "Reshape" + input: "bert/encoder/layer_7/attention/self/value/BiasAdd" + input: "bert/encoder/layer_7/attention/self/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/transpose_2/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/transpose_2" + op: "Transpose" + input: "bert/encoder/layer_7/attention/self/Reshape_2" + input: "bert/encoder/layer_7/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/MatMul_1" + op: "BatchMatMul" + input: "bert/encoder/layer_7/attention/self/dropout/mul" + input: "bert/encoder/layer_7/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/transpose_3/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/transpose_3" + op: "Transpose" + input: "bert/encoder/layer_7/attention/self/MatMul_1" + input: "bert/encoder/layer_7/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/Reshape_3/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/Reshape_3" + op: "Reshape" + input: "bert/encoder/layer_7/attention/self/transpose_3" + input: "bert/encoder/layer_7/attention/self/Reshape_3/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_7/attention/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_7/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_7/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_7/attention/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_7/attention/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/dense/kernel" + input: "bert/encoder/layer_7/attention/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/dense/bias" + input: "bert/encoder/layer_7/attention/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_7/attention/self/Reshape_3" + input: "bert/encoder/layer_7/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_7/attention/output/dense/MatMul" + input: "bert/encoder/layer_7/attention/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dropout/keep_prob" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_7/attention/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_7/attention/output/dropout/random_uniform/max" + input: "bert/encoder/layer_7/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_7/attention/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_7/attention/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_7/attention/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_7/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dropout/add" + op: "Add" + input: "bert/encoder/layer_7/attention/output/dropout/keep_prob" + input: "bert/encoder/layer_7/attention/output/dropout/random_uniform" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dropout/Floor" + op: "Floor" + input: "bert/encoder/layer_7/attention/output/dropout/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dropout/div" + op: "RealDiv" + input: "bert/encoder/layer_7/attention/output/dense/BiasAdd" + input: "bert/encoder/layer_7/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_7/attention/output/dropout/div" + input: "bert/encoder/layer_7/attention/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/add" + op: "Add" + input: "bert/encoder/layer_7/attention/output/dropout/mul" + input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_7/attention/output/add" + input: "bert/encoder/layer_7/attention/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_7/attention/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_7/attention/output/add" + input: "bert/encoder/layer_7/attention/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_7/attention/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_7/attention/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_7/attention/output/add" + input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_7/attention/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/read" + input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_7/intermediate/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_7/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_7/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_7/intermediate/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_7/intermediate/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_7/intermediate/dense/kernel" + input: "bert/encoder/layer_7/intermediate/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_7/intermediate/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/bias/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/bias/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_7/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_7/intermediate/dense/bias/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_7/intermediate/dense/bias" + input: "bert/encoder/layer_7/intermediate/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_7/intermediate/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_7/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_7/intermediate/dense/MatMul" + input: "bert/encoder/layer_7/intermediate/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/Sqrt/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/Sqrt" + op: "Sqrt" + input: "bert/encoder/layer_7/intermediate/dense/Sqrt/x" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/truediv" + op: "RealDiv" + input: "bert/encoder/layer_7/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_7/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/Erf" + op: "Erf" + input: "bert/encoder/layer_7/intermediate/dense/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/add/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/add" + op: "Add" + input: "bert/encoder/layer_7/intermediate/dense/add/x" + input: "bert/encoder/layer_7/intermediate/dense/Erf" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/mul/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.5 + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/mul" + op: "Mul" + input: "bert/encoder/layer_7/intermediate/dense/mul/x" + input: "bert/encoder/layer_7/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/mul_1" + op: "Mul" + input: "bert/encoder/layer_7/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_7/intermediate/dense/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_7/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_7/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_7/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_7/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_7/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_7/output/dense/kernel" + input: "bert/encoder/layer_7/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_7/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_7/output/dense/bias" + input: "bert/encoder/layer_7/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_7/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_7/intermediate/dense/mul_1" + input: "bert/encoder/layer_7/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_7/output/dense/MatMul" + input: "bert/encoder/layer_7/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_7/output/dropout/keep_prob" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_7/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_7/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_7/output/dropout/random_uniform/max" + input: "bert/encoder/layer_7/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_7/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_7/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_7/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_7/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dropout/add" + op: "Add" + input: "bert/encoder/layer_7/output/dropout/keep_prob" + input: "bert/encoder/layer_7/output/dropout/random_uniform" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dropout/Floor" + op: "Floor" + input: "bert/encoder/layer_7/output/dropout/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dropout/div" + op: "RealDiv" + input: "bert/encoder/layer_7/output/dense/BiasAdd" + input: "bert/encoder/layer_7/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_7/output/dropout/div" + input: "bert/encoder/layer_7/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/add" + op: "Add" + input: "bert/encoder/layer_7/output/dropout/mul" + input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_7/output/LayerNorm/beta" + input: "bert/encoder/layer_7/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_7/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_7/output/LayerNorm/gamma" + input: "bert/encoder/layer_7/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_7/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_7/output/add" + input: "bert/encoder/layer_7/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_7/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_7/output/add" + input: "bert/encoder/layer_7/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_7/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_7/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_7/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_7/output/add" + input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_7/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_7/output/LayerNorm/beta/read" + input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_8/attention/self/query/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_8/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_8/attention/self/query/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_8/attention/self/query/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_8/attention/self/query/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/query/kernel" + input: "bert/encoder/layer_8/attention/self/query/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/kernel/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/self/query/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/query/bias" + input: "bert/encoder/layer_8/attention/self/query/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/bias/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/self/query/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/MatMul" + op: "MatMul" + input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_8/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_8/attention/self/query/MatMul" + input: "bert/encoder/layer_8/attention/self/query/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_8/attention/self/key/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_8/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_8/attention/self/key/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_8/attention/self/key/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_8/attention/self/key/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/key/kernel" + input: "bert/encoder/layer_8/attention/self/key/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/kernel/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/self/key/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/key/bias" + input: "bert/encoder/layer_8/attention/self/key/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/bias/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/self/key/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/MatMul" + op: "MatMul" + input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_8/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_8/attention/self/key/MatMul" + input: "bert/encoder/layer_8/attention/self/key/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_8/attention/self/value/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_8/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_8/attention/self/value/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_8/attention/self/value/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_8/attention/self/value/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/value/kernel" + input: "bert/encoder/layer_8/attention/self/value/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/kernel/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/self/value/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/value/bias" + input: "bert/encoder/layer_8/attention/self/value/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/bias/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/self/value/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/MatMul" + op: "MatMul" + input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_8/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_8/attention/self/value/MatMul" + input: "bert/encoder/layer_8/attention/self/value/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/Reshape" + op: "Reshape" + input: "bert/encoder/layer_8/attention/self/query/BiasAdd" + input: "bert/encoder/layer_8/attention/self/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/transpose" + op: "Transpose" + input: "bert/encoder/layer_8/attention/self/Reshape" + input: "bert/encoder/layer_8/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/Reshape_1" + op: "Reshape" + input: "bert/encoder/layer_8/attention/self/key/BiasAdd" + input: "bert/encoder/layer_8/attention/self/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/transpose_1" + op: "Transpose" + input: "bert/encoder/layer_8/attention/self/Reshape_1" + input: "bert/encoder/layer_8/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/MatMul" + op: "BatchMatMul" + input: "bert/encoder/layer_8/attention/self/transpose" + input: "bert/encoder/layer_8/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/Mul/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.125 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/Mul" + op: "Mul" + input: "bert/encoder/layer_8/attention/self/MatMul" + input: "bert/encoder/layer_8/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/ExpandDims" + op: "ExpandDims" + input: "bert/encoder/mul" + input: "bert/encoder/layer_8/attention/self/ExpandDims/dim" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tdim" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/sub" + op: "Sub" + input: "bert/encoder/layer_8/attention/self/sub/x" + input: "bert/encoder/layer_8/attention/self/ExpandDims" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/mul_1/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: -10000.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/mul_1" + op: "Mul" + input: "bert/encoder/layer_8/attention/self/sub" + input: "bert/encoder/layer_8/attention/self/mul_1/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/add" + op: "Add" + input: "bert/encoder/layer_8/attention/self/Mul" + input: "bert/encoder/layer_8/attention/self/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/Softmax" + op: "Softmax" + input: "bert/encoder/layer_8/attention/self/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/dropout/keep_prob" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_8/attention/self/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_8/attention/self/dropout/random_uniform/max" + input: "bert/encoder/layer_8/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_8/attention/self/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_8/attention/self/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_8/attention/self/dropout/random_uniform/mul" + input: "bert/encoder/layer_8/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/dropout/add" + op: "Add" + input: "bert/encoder/layer_8/attention/self/dropout/keep_prob" + input: "bert/encoder/layer_8/attention/self/dropout/random_uniform" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/dropout/Floor" + op: "Floor" + input: "bert/encoder/layer_8/attention/self/dropout/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/dropout/div" + op: "RealDiv" + input: "bert/encoder/layer_8/attention/self/Softmax" + input: "bert/encoder/layer_8/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_8/attention/self/dropout/div" + input: "bert/encoder/layer_8/attention/self/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/Reshape_2" + op: "Reshape" + input: "bert/encoder/layer_8/attention/self/value/BiasAdd" + input: "bert/encoder/layer_8/attention/self/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/transpose_2/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/transpose_2" + op: "Transpose" + input: "bert/encoder/layer_8/attention/self/Reshape_2" + input: "bert/encoder/layer_8/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/MatMul_1" + op: "BatchMatMul" + input: "bert/encoder/layer_8/attention/self/dropout/mul" + input: "bert/encoder/layer_8/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/transpose_3/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/transpose_3" + op: "Transpose" + input: "bert/encoder/layer_8/attention/self/MatMul_1" + input: "bert/encoder/layer_8/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/Reshape_3/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/Reshape_3" + op: "Reshape" + input: "bert/encoder/layer_8/attention/self/transpose_3" + input: "bert/encoder/layer_8/attention/self/Reshape_3/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_8/attention/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_8/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_8/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_8/attention/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_8/attention/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/dense/kernel" + input: "bert/encoder/layer_8/attention/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/dense/bias" + input: "bert/encoder/layer_8/attention/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_8/attention/self/Reshape_3" + input: "bert/encoder/layer_8/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_8/attention/output/dense/MatMul" + input: "bert/encoder/layer_8/attention/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dropout/keep_prob" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_8/attention/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_8/attention/output/dropout/random_uniform/max" + input: "bert/encoder/layer_8/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_8/attention/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_8/attention/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_8/attention/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_8/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dropout/add" + op: "Add" + input: "bert/encoder/layer_8/attention/output/dropout/keep_prob" + input: "bert/encoder/layer_8/attention/output/dropout/random_uniform" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dropout/Floor" + op: "Floor" + input: "bert/encoder/layer_8/attention/output/dropout/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dropout/div" + op: "RealDiv" + input: "bert/encoder/layer_8/attention/output/dense/BiasAdd" + input: "bert/encoder/layer_8/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_8/attention/output/dropout/div" + input: "bert/encoder/layer_8/attention/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/add" + op: "Add" + input: "bert/encoder/layer_8/attention/output/dropout/mul" + input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_8/attention/output/add" + input: "bert/encoder/layer_8/attention/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_8/attention/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_8/attention/output/add" + input: "bert/encoder/layer_8/attention/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_8/attention/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_8/attention/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_8/attention/output/add" + input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_8/attention/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/read" + input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_8/intermediate/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_8/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_8/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_8/intermediate/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_8/intermediate/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_8/intermediate/dense/kernel" + input: "bert/encoder/layer_8/intermediate/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_8/intermediate/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/bias/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/bias/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_8/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_8/intermediate/dense/bias/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_8/intermediate/dense/bias" + input: "bert/encoder/layer_8/intermediate/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_8/intermediate/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_8/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_8/intermediate/dense/MatMul" + input: "bert/encoder/layer_8/intermediate/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/Sqrt/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/Sqrt" + op: "Sqrt" + input: "bert/encoder/layer_8/intermediate/dense/Sqrt/x" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/truediv" + op: "RealDiv" + input: "bert/encoder/layer_8/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_8/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/Erf" + op: "Erf" + input: "bert/encoder/layer_8/intermediate/dense/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/add/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/add" + op: "Add" + input: "bert/encoder/layer_8/intermediate/dense/add/x" + input: "bert/encoder/layer_8/intermediate/dense/Erf" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/mul/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.5 + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/mul" + op: "Mul" + input: "bert/encoder/layer_8/intermediate/dense/mul/x" + input: "bert/encoder/layer_8/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/mul_1" + op: "Mul" + input: "bert/encoder/layer_8/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_8/intermediate/dense/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_8/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_8/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_8/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_8/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_8/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_8/output/dense/kernel" + input: "bert/encoder/layer_8/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_8/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_8/output/dense/bias" + input: "bert/encoder/layer_8/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_8/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_8/intermediate/dense/mul_1" + input: "bert/encoder/layer_8/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_8/output/dense/MatMul" + input: "bert/encoder/layer_8/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_8/output/dropout/keep_prob" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_8/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_8/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_8/output/dropout/random_uniform/max" + input: "bert/encoder/layer_8/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_8/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_8/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_8/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_8/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dropout/add" + op: "Add" + input: "bert/encoder/layer_8/output/dropout/keep_prob" + input: "bert/encoder/layer_8/output/dropout/random_uniform" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dropout/Floor" + op: "Floor" + input: "bert/encoder/layer_8/output/dropout/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dropout/div" + op: "RealDiv" + input: "bert/encoder/layer_8/output/dense/BiasAdd" + input: "bert/encoder/layer_8/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_8/output/dropout/div" + input: "bert/encoder/layer_8/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/add" + op: "Add" + input: "bert/encoder/layer_8/output/dropout/mul" + input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_8/output/LayerNorm/beta" + input: "bert/encoder/layer_8/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_8/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_8/output/LayerNorm/gamma" + input: "bert/encoder/layer_8/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_8/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_8/output/add" + input: "bert/encoder/layer_8/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_8/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_8/output/add" + input: "bert/encoder/layer_8/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_8/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_8/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_8/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_8/output/add" + input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_8/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_8/output/LayerNorm/beta/read" + input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_9/attention/self/query/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_9/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_9/attention/self/query/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_9/attention/self/query/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_9/attention/self/query/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/query/kernel" + input: "bert/encoder/layer_9/attention/self/query/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/kernel/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/self/query/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/query/bias" + input: "bert/encoder/layer_9/attention/self/query/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/bias/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/self/query/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/MatMul" + op: "MatMul" + input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_9/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_9/attention/self/query/MatMul" + input: "bert/encoder/layer_9/attention/self/query/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_9/attention/self/key/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_9/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_9/attention/self/key/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_9/attention/self/key/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_9/attention/self/key/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/key/kernel" + input: "bert/encoder/layer_9/attention/self/key/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/kernel/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/self/key/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/key/bias" + input: "bert/encoder/layer_9/attention/self/key/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/bias/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/self/key/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/MatMul" + op: "MatMul" + input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_9/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_9/attention/self/key/MatMul" + input: "bert/encoder/layer_9/attention/self/key/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_9/attention/self/value/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_9/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_9/attention/self/value/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_9/attention/self/value/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_9/attention/self/value/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/value/kernel" + input: "bert/encoder/layer_9/attention/self/value/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/kernel/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/self/value/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/value/bias" + input: "bert/encoder/layer_9/attention/self/value/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/bias/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/self/value/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/MatMul" + op: "MatMul" + input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_9/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_9/attention/self/value/MatMul" + input: "bert/encoder/layer_9/attention/self/value/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/Reshape" + op: "Reshape" + input: "bert/encoder/layer_9/attention/self/query/BiasAdd" + input: "bert/encoder/layer_9/attention/self/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/transpose" + op: "Transpose" + input: "bert/encoder/layer_9/attention/self/Reshape" + input: "bert/encoder/layer_9/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/Reshape_1" + op: "Reshape" + input: "bert/encoder/layer_9/attention/self/key/BiasAdd" + input: "bert/encoder/layer_9/attention/self/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/transpose_1" + op: "Transpose" + input: "bert/encoder/layer_9/attention/self/Reshape_1" + input: "bert/encoder/layer_9/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/MatMul" + op: "BatchMatMul" + input: "bert/encoder/layer_9/attention/self/transpose" + input: "bert/encoder/layer_9/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/Mul/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.125 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/Mul" + op: "Mul" + input: "bert/encoder/layer_9/attention/self/MatMul" + input: "bert/encoder/layer_9/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/ExpandDims" + op: "ExpandDims" + input: "bert/encoder/mul" + input: "bert/encoder/layer_9/attention/self/ExpandDims/dim" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tdim" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/sub" + op: "Sub" + input: "bert/encoder/layer_9/attention/self/sub/x" + input: "bert/encoder/layer_9/attention/self/ExpandDims" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/mul_1/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: -10000.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/mul_1" + op: "Mul" + input: "bert/encoder/layer_9/attention/self/sub" + input: "bert/encoder/layer_9/attention/self/mul_1/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/add" + op: "Add" + input: "bert/encoder/layer_9/attention/self/Mul" + input: "bert/encoder/layer_9/attention/self/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/Softmax" + op: "Softmax" + input: "bert/encoder/layer_9/attention/self/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/dropout/keep_prob" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_9/attention/self/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_9/attention/self/dropout/random_uniform/max" + input: "bert/encoder/layer_9/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_9/attention/self/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_9/attention/self/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_9/attention/self/dropout/random_uniform/mul" + input: "bert/encoder/layer_9/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/dropout/add" + op: "Add" + input: "bert/encoder/layer_9/attention/self/dropout/keep_prob" + input: "bert/encoder/layer_9/attention/self/dropout/random_uniform" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/dropout/Floor" + op: "Floor" + input: "bert/encoder/layer_9/attention/self/dropout/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/dropout/div" + op: "RealDiv" + input: "bert/encoder/layer_9/attention/self/Softmax" + input: "bert/encoder/layer_9/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_9/attention/self/dropout/div" + input: "bert/encoder/layer_9/attention/self/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/Reshape_2" + op: "Reshape" + input: "bert/encoder/layer_9/attention/self/value/BiasAdd" + input: "bert/encoder/layer_9/attention/self/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/transpose_2/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/transpose_2" + op: "Transpose" + input: "bert/encoder/layer_9/attention/self/Reshape_2" + input: "bert/encoder/layer_9/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/MatMul_1" + op: "BatchMatMul" + input: "bert/encoder/layer_9/attention/self/dropout/mul" + input: "bert/encoder/layer_9/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/transpose_3/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/transpose_3" + op: "Transpose" + input: "bert/encoder/layer_9/attention/self/MatMul_1" + input: "bert/encoder/layer_9/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/Reshape_3/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/Reshape_3" + op: "Reshape" + input: "bert/encoder/layer_9/attention/self/transpose_3" + input: "bert/encoder/layer_9/attention/self/Reshape_3/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_9/attention/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_9/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_9/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_9/attention/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_9/attention/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/dense/kernel" + input: "bert/encoder/layer_9/attention/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/dense/bias" + input: "bert/encoder/layer_9/attention/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_9/attention/self/Reshape_3" + input: "bert/encoder/layer_9/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_9/attention/output/dense/MatMul" + input: "bert/encoder/layer_9/attention/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dropout/keep_prob" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_9/attention/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_9/attention/output/dropout/random_uniform/max" + input: "bert/encoder/layer_9/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_9/attention/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_9/attention/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_9/attention/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_9/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dropout/add" + op: "Add" + input: "bert/encoder/layer_9/attention/output/dropout/keep_prob" + input: "bert/encoder/layer_9/attention/output/dropout/random_uniform" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dropout/Floor" + op: "Floor" + input: "bert/encoder/layer_9/attention/output/dropout/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dropout/div" + op: "RealDiv" + input: "bert/encoder/layer_9/attention/output/dense/BiasAdd" + input: "bert/encoder/layer_9/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_9/attention/output/dropout/div" + input: "bert/encoder/layer_9/attention/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/add" + op: "Add" + input: "bert/encoder/layer_9/attention/output/dropout/mul" + input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_9/attention/output/add" + input: "bert/encoder/layer_9/attention/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_9/attention/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_9/attention/output/add" + input: "bert/encoder/layer_9/attention/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_9/attention/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_9/attention/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_9/attention/output/add" + input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_9/attention/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/read" + input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_9/intermediate/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_9/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_9/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_9/intermediate/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_9/intermediate/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_9/intermediate/dense/kernel" + input: "bert/encoder/layer_9/intermediate/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_9/intermediate/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/bias/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/bias/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_9/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_9/intermediate/dense/bias/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_9/intermediate/dense/bias" + input: "bert/encoder/layer_9/intermediate/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_9/intermediate/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_9/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_9/intermediate/dense/MatMul" + input: "bert/encoder/layer_9/intermediate/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/Sqrt/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/Sqrt" + op: "Sqrt" + input: "bert/encoder/layer_9/intermediate/dense/Sqrt/x" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/truediv" + op: "RealDiv" + input: "bert/encoder/layer_9/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_9/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/Erf" + op: "Erf" + input: "bert/encoder/layer_9/intermediate/dense/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/add/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/add" + op: "Add" + input: "bert/encoder/layer_9/intermediate/dense/add/x" + input: "bert/encoder/layer_9/intermediate/dense/Erf" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/mul/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.5 + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/mul" + op: "Mul" + input: "bert/encoder/layer_9/intermediate/dense/mul/x" + input: "bert/encoder/layer_9/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/mul_1" + op: "Mul" + input: "bert/encoder/layer_9/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_9/intermediate/dense/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_9/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_9/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_9/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_9/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_9/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_9/output/dense/kernel" + input: "bert/encoder/layer_9/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_9/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_9/output/dense/bias" + input: "bert/encoder/layer_9/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_9/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_9/intermediate/dense/mul_1" + input: "bert/encoder/layer_9/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_9/output/dense/MatMul" + input: "bert/encoder/layer_9/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_9/output/dropout/keep_prob" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_9/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_9/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_9/output/dropout/random_uniform/max" + input: "bert/encoder/layer_9/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_9/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_9/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_9/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_9/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dropout/add" + op: "Add" + input: "bert/encoder/layer_9/output/dropout/keep_prob" + input: "bert/encoder/layer_9/output/dropout/random_uniform" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dropout/Floor" + op: "Floor" + input: "bert/encoder/layer_9/output/dropout/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dropout/div" + op: "RealDiv" + input: "bert/encoder/layer_9/output/dense/BiasAdd" + input: "bert/encoder/layer_9/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_9/output/dropout/div" + input: "bert/encoder/layer_9/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/add" + op: "Add" + input: "bert/encoder/layer_9/output/dropout/mul" + input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_9/output/LayerNorm/beta" + input: "bert/encoder/layer_9/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_9/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_9/output/LayerNorm/gamma" + input: "bert/encoder/layer_9/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_9/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_9/output/add" + input: "bert/encoder/layer_9/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_9/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_9/output/add" + input: "bert/encoder/layer_9/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_9/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_9/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_9/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_9/output/add" + input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_9/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_9/output/LayerNorm/beta/read" + input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_10/attention/self/query/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_10/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_10/attention/self/query/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_10/attention/self/query/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_10/attention/self/query/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/query/kernel" + input: "bert/encoder/layer_10/attention/self/query/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/kernel/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/self/query/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/query/bias" + input: "bert/encoder/layer_10/attention/self/query/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/bias/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/self/query/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/MatMul" + op: "MatMul" + input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_10/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_10/attention/self/query/MatMul" + input: "bert/encoder/layer_10/attention/self/query/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_10/attention/self/key/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_10/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_10/attention/self/key/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_10/attention/self/key/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_10/attention/self/key/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/key/kernel" + input: "bert/encoder/layer_10/attention/self/key/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/kernel/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/self/key/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/key/bias" + input: "bert/encoder/layer_10/attention/self/key/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/bias/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/self/key/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/MatMul" + op: "MatMul" + input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_10/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_10/attention/self/key/MatMul" + input: "bert/encoder/layer_10/attention/self/key/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_10/attention/self/value/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_10/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_10/attention/self/value/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_10/attention/self/value/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_10/attention/self/value/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/value/kernel" + input: "bert/encoder/layer_10/attention/self/value/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/kernel/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/self/value/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/value/bias" + input: "bert/encoder/layer_10/attention/self/value/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/bias/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/self/value/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/MatMul" + op: "MatMul" + input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_10/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_10/attention/self/value/MatMul" + input: "bert/encoder/layer_10/attention/self/value/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/Reshape" + op: "Reshape" + input: "bert/encoder/layer_10/attention/self/query/BiasAdd" + input: "bert/encoder/layer_10/attention/self/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/transpose" + op: "Transpose" + input: "bert/encoder/layer_10/attention/self/Reshape" + input: "bert/encoder/layer_10/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/Reshape_1" + op: "Reshape" + input: "bert/encoder/layer_10/attention/self/key/BiasAdd" + input: "bert/encoder/layer_10/attention/self/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/transpose_1" + op: "Transpose" + input: "bert/encoder/layer_10/attention/self/Reshape_1" + input: "bert/encoder/layer_10/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/MatMul" + op: "BatchMatMul" + input: "bert/encoder/layer_10/attention/self/transpose" + input: "bert/encoder/layer_10/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/Mul/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.125 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/Mul" + op: "Mul" + input: "bert/encoder/layer_10/attention/self/MatMul" + input: "bert/encoder/layer_10/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/ExpandDims" + op: "ExpandDims" + input: "bert/encoder/mul" + input: "bert/encoder/layer_10/attention/self/ExpandDims/dim" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tdim" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/sub" + op: "Sub" + input: "bert/encoder/layer_10/attention/self/sub/x" + input: "bert/encoder/layer_10/attention/self/ExpandDims" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/mul_1/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: -10000.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/mul_1" + op: "Mul" + input: "bert/encoder/layer_10/attention/self/sub" + input: "bert/encoder/layer_10/attention/self/mul_1/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/add" + op: "Add" + input: "bert/encoder/layer_10/attention/self/Mul" + input: "bert/encoder/layer_10/attention/self/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/Softmax" + op: "Softmax" + input: "bert/encoder/layer_10/attention/self/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/dropout/keep_prob" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_10/attention/self/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_10/attention/self/dropout/random_uniform/max" + input: "bert/encoder/layer_10/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_10/attention/self/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_10/attention/self/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_10/attention/self/dropout/random_uniform/mul" + input: "bert/encoder/layer_10/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/dropout/add" + op: "Add" + input: "bert/encoder/layer_10/attention/self/dropout/keep_prob" + input: "bert/encoder/layer_10/attention/self/dropout/random_uniform" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/dropout/Floor" + op: "Floor" + input: "bert/encoder/layer_10/attention/self/dropout/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/dropout/div" + op: "RealDiv" + input: "bert/encoder/layer_10/attention/self/Softmax" + input: "bert/encoder/layer_10/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_10/attention/self/dropout/div" + input: "bert/encoder/layer_10/attention/self/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/Reshape_2" + op: "Reshape" + input: "bert/encoder/layer_10/attention/self/value/BiasAdd" + input: "bert/encoder/layer_10/attention/self/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/transpose_2/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/transpose_2" + op: "Transpose" + input: "bert/encoder/layer_10/attention/self/Reshape_2" + input: "bert/encoder/layer_10/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/MatMul_1" + op: "BatchMatMul" + input: "bert/encoder/layer_10/attention/self/dropout/mul" + input: "bert/encoder/layer_10/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/transpose_3/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/transpose_3" + op: "Transpose" + input: "bert/encoder/layer_10/attention/self/MatMul_1" + input: "bert/encoder/layer_10/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/Reshape_3/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/Reshape_3" + op: "Reshape" + input: "bert/encoder/layer_10/attention/self/transpose_3" + input: "bert/encoder/layer_10/attention/self/Reshape_3/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_10/attention/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_10/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_10/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_10/attention/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_10/attention/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/dense/kernel" + input: "bert/encoder/layer_10/attention/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/dense/bias" + input: "bert/encoder/layer_10/attention/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_10/attention/self/Reshape_3" + input: "bert/encoder/layer_10/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_10/attention/output/dense/MatMul" + input: "bert/encoder/layer_10/attention/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dropout/keep_prob" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_10/attention/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_10/attention/output/dropout/random_uniform/max" + input: "bert/encoder/layer_10/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_10/attention/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_10/attention/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_10/attention/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_10/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dropout/add" + op: "Add" + input: "bert/encoder/layer_10/attention/output/dropout/keep_prob" + input: "bert/encoder/layer_10/attention/output/dropout/random_uniform" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dropout/Floor" + op: "Floor" + input: "bert/encoder/layer_10/attention/output/dropout/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dropout/div" + op: "RealDiv" + input: "bert/encoder/layer_10/attention/output/dense/BiasAdd" + input: "bert/encoder/layer_10/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_10/attention/output/dropout/div" + input: "bert/encoder/layer_10/attention/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/add" + op: "Add" + input: "bert/encoder/layer_10/attention/output/dropout/mul" + input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_10/attention/output/add" + input: "bert/encoder/layer_10/attention/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_10/attention/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_10/attention/output/add" + input: "bert/encoder/layer_10/attention/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_10/attention/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_10/attention/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_10/attention/output/add" + input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_10/attention/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/read" + input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_10/intermediate/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_10/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_10/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_10/intermediate/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_10/intermediate/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_10/intermediate/dense/kernel" + input: "bert/encoder/layer_10/intermediate/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_10/intermediate/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/bias/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/bias/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_10/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_10/intermediate/dense/bias/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_10/intermediate/dense/bias" + input: "bert/encoder/layer_10/intermediate/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_10/intermediate/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_10/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_10/intermediate/dense/MatMul" + input: "bert/encoder/layer_10/intermediate/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/Sqrt/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/Sqrt" + op: "Sqrt" + input: "bert/encoder/layer_10/intermediate/dense/Sqrt/x" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/truediv" + op: "RealDiv" + input: "bert/encoder/layer_10/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_10/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/Erf" + op: "Erf" + input: "bert/encoder/layer_10/intermediate/dense/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/add/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/add" + op: "Add" + input: "bert/encoder/layer_10/intermediate/dense/add/x" + input: "bert/encoder/layer_10/intermediate/dense/Erf" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/mul/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.5 + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/mul" + op: "Mul" + input: "bert/encoder/layer_10/intermediate/dense/mul/x" + input: "bert/encoder/layer_10/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/mul_1" + op: "Mul" + input: "bert/encoder/layer_10/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_10/intermediate/dense/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_10/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_10/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_10/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_10/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_10/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_10/output/dense/kernel" + input: "bert/encoder/layer_10/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_10/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_10/output/dense/bias" + input: "bert/encoder/layer_10/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_10/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_10/intermediate/dense/mul_1" + input: "bert/encoder/layer_10/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_10/output/dense/MatMul" + input: "bert/encoder/layer_10/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_10/output/dropout/keep_prob" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_10/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_10/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_10/output/dropout/random_uniform/max" + input: "bert/encoder/layer_10/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_10/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_10/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_10/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_10/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dropout/add" + op: "Add" + input: "bert/encoder/layer_10/output/dropout/keep_prob" + input: "bert/encoder/layer_10/output/dropout/random_uniform" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dropout/Floor" + op: "Floor" + input: "bert/encoder/layer_10/output/dropout/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dropout/div" + op: "RealDiv" + input: "bert/encoder/layer_10/output/dense/BiasAdd" + input: "bert/encoder/layer_10/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_10/output/dropout/div" + input: "bert/encoder/layer_10/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/add" + op: "Add" + input: "bert/encoder/layer_10/output/dropout/mul" + input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_10/output/LayerNorm/beta" + input: "bert/encoder/layer_10/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_10/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_10/output/LayerNorm/gamma" + input: "bert/encoder/layer_10/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_10/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_10/output/add" + input: "bert/encoder/layer_10/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_10/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_10/output/add" + input: "bert/encoder/layer_10/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_10/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_10/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_10/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_10/output/add" + input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_10/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_10/output/LayerNorm/beta/read" + input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_11/attention/self/query/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_11/attention/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_11/attention/self/query/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_11/attention/self/query/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_11/attention/self/query/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/query/kernel" + input: "bert/encoder/layer_11/attention/self/query/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/kernel/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/self/query/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/query/bias" + input: "bert/encoder/layer_11/attention/self/query/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/bias/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/self/query/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/MatMul" + op: "MatMul" + input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_11/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_11/attention/self/query/MatMul" + input: "bert/encoder/layer_11/attention/self/query/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_11/attention/self/key/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_11/attention/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_11/attention/self/key/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_11/attention/self/key/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_11/attention/self/key/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/key/kernel" + input: "bert/encoder/layer_11/attention/self/key/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/kernel/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/self/key/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/key/bias" + input: "bert/encoder/layer_11/attention/self/key/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/bias/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/self/key/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/MatMul" + op: "MatMul" + input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_11/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_11/attention/self/key/MatMul" + input: "bert/encoder/layer_11/attention/self/key/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_11/attention/self/value/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_11/attention/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_11/attention/self/value/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_11/attention/self/value/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_11/attention/self/value/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/value/kernel" + input: "bert/encoder/layer_11/attention/self/value/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/kernel/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/self/value/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/value/bias" + input: "bert/encoder/layer_11/attention/self/value/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/bias/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/self/value/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/MatMul" + op: "MatMul" + input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_11/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_11/attention/self/value/MatMul" + input: "bert/encoder/layer_11/attention/self/value/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/Reshape" + op: "Reshape" + input: "bert/encoder/layer_11/attention/self/query/BiasAdd" + input: "bert/encoder/layer_11/attention/self/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/transpose" + op: "Transpose" + input: "bert/encoder/layer_11/attention/self/Reshape" + input: "bert/encoder/layer_11/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/Reshape_1" + op: "Reshape" + input: "bert/encoder/layer_11/attention/self/key/BiasAdd" + input: "bert/encoder/layer_11/attention/self/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/transpose_1" + op: "Transpose" + input: "bert/encoder/layer_11/attention/self/Reshape_1" + input: "bert/encoder/layer_11/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/MatMul" + op: "BatchMatMul" + input: "bert/encoder/layer_11/attention/self/transpose" + input: "bert/encoder/layer_11/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/Mul/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.125 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/Mul" + op: "Mul" + input: "bert/encoder/layer_11/attention/self/MatMul" + input: "bert/encoder/layer_11/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/ExpandDims" + op: "ExpandDims" + input: "bert/encoder/mul" + input: "bert/encoder/layer_11/attention/self/ExpandDims/dim" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tdim" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/sub" + op: "Sub" + input: "bert/encoder/layer_11/attention/self/sub/x" + input: "bert/encoder/layer_11/attention/self/ExpandDims" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/mul_1/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: -10000.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/mul_1" + op: "Mul" + input: "bert/encoder/layer_11/attention/self/sub" + input: "bert/encoder/layer_11/attention/self/mul_1/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/add" + op: "Add" + input: "bert/encoder/layer_11/attention/self/Mul" + input: "bert/encoder/layer_11/attention/self/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/Softmax" + op: "Softmax" + input: "bert/encoder/layer_11/attention/self/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/dropout/keep_prob" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_11/attention/self/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_11/attention/self/dropout/random_uniform/max" + input: "bert/encoder/layer_11/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_11/attention/self/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_11/attention/self/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_11/attention/self/dropout/random_uniform/mul" + input: "bert/encoder/layer_11/attention/self/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/dropout/add" + op: "Add" + input: "bert/encoder/layer_11/attention/self/dropout/keep_prob" + input: "bert/encoder/layer_11/attention/self/dropout/random_uniform" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/dropout/Floor" + op: "Floor" + input: "bert/encoder/layer_11/attention/self/dropout/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/dropout/div" + op: "RealDiv" + input: "bert/encoder/layer_11/attention/self/Softmax" + input: "bert/encoder/layer_11/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_11/attention/self/dropout/div" + input: "bert/encoder/layer_11/attention/self/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/Reshape_2" + op: "Reshape" + input: "bert/encoder/layer_11/attention/self/value/BiasAdd" + input: "bert/encoder/layer_11/attention/self/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/transpose_2/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/transpose_2" + op: "Transpose" + input: "bert/encoder/layer_11/attention/self/Reshape_2" + input: "bert/encoder/layer_11/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/MatMul_1" + op: "BatchMatMul" + input: "bert/encoder/layer_11/attention/self/dropout/mul" + input: "bert/encoder/layer_11/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/transpose_3/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/transpose_3" + op: "Transpose" + input: "bert/encoder/layer_11/attention/self/MatMul_1" + input: "bert/encoder/layer_11/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/Reshape_3/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/Reshape_3" + op: "Reshape" + input: "bert/encoder/layer_11/attention/self/transpose_3" + input: "bert/encoder/layer_11/attention/self/Reshape_3/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_11/attention/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_11/attention/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_11/attention/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_11/attention/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_11/attention/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/dense/kernel" + input: "bert/encoder/layer_11/attention/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/dense/bias" + input: "bert/encoder/layer_11/attention/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_11/attention/self/Reshape_3" + input: "bert/encoder/layer_11/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_11/attention/output/dense/MatMul" + input: "bert/encoder/layer_11/attention/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dropout/keep_prob" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_11/attention/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_11/attention/output/dropout/random_uniform/max" + input: "bert/encoder/layer_11/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_11/attention/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_11/attention/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_11/attention/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_11/attention/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dropout/add" + op: "Add" + input: "bert/encoder/layer_11/attention/output/dropout/keep_prob" + input: "bert/encoder/layer_11/attention/output/dropout/random_uniform" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dropout/Floor" + op: "Floor" + input: "bert/encoder/layer_11/attention/output/dropout/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dropout/div" + op: "RealDiv" + input: "bert/encoder/layer_11/attention/output/dense/BiasAdd" + input: "bert/encoder/layer_11/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_11/attention/output/dropout/div" + input: "bert/encoder/layer_11/attention/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/add" + op: "Add" + input: "bert/encoder/layer_11/attention/output/dropout/mul" + input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_11/attention/output/add" + input: "bert/encoder/layer_11/attention/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_11/attention/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_11/attention/output/add" + input: "bert/encoder/layer_11/attention/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_11/attention/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_11/attention/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_11/attention/output/add" + input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_11/attention/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/read" + input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_11/intermediate/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_11/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_11/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_11/intermediate/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_11/intermediate/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_11/intermediate/dense/kernel" + input: "bert/encoder/layer_11/intermediate/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_11/intermediate/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/bias/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/bias/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_11/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_11/intermediate/dense/bias/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_11/intermediate/dense/bias" + input: "bert/encoder/layer_11/intermediate/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_11/intermediate/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/layer_11/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_11/intermediate/dense/MatMul" + input: "bert/encoder/layer_11/intermediate/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/Sqrt/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/Sqrt" + op: "Sqrt" + input: "bert/encoder/layer_11/intermediate/dense/Sqrt/x" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/truediv" + op: "RealDiv" + input: "bert/encoder/layer_11/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_11/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/Erf" + op: "Erf" + input: "bert/encoder/layer_11/intermediate/dense/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/add/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/add" + op: "Add" + input: "bert/encoder/layer_11/intermediate/dense/add/x" + input: "bert/encoder/layer_11/intermediate/dense/Erf" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/mul/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.5 + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/mul" + op: "Mul" + input: "bert/encoder/layer_11/intermediate/dense/mul/x" + input: "bert/encoder/layer_11/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/mul_1" + op: "Mul" + input: "bert/encoder/layer_11/intermediate/dense/BiasAdd" + input: "bert/encoder/layer_11/intermediate/dense/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/layer_11/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/layer_11/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/layer_11/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/layer_11/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/layer_11/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/layer_11/output/dense/kernel" + input: "bert/encoder/layer_11/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/layer_11/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/layer_11/output/dense/bias" + input: "bert/encoder/layer_11/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/layer_11/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/MatMul" + op: "MatMul" + input: "bert/encoder/layer_11/intermediate/dense/mul_1" + input: "bert/encoder/layer_11/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/BiasAdd" + op: "BiasAdd" + input: "bert/encoder/layer_11/output/dense/MatMul" + input: "bert/encoder/layer_11/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/encoder/layer_11/output/dropout/keep_prob" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dropout/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dropout/random_uniform/min" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dropout/random_uniform/max" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dropout/random_uniform/RandomUniform" + op: "RandomUniform" + input: "bert/encoder/layer_11/output/dropout/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/layer_11/output/dropout/random_uniform/sub" + op: "Sub" + input: "bert/encoder/layer_11/output/dropout/random_uniform/max" + input: "bert/encoder/layer_11/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dropout/random_uniform/mul" + op: "Mul" + input: "bert/encoder/layer_11/output/dropout/random_uniform/RandomUniform" + input: "bert/encoder/layer_11/output/dropout/random_uniform/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dropout/random_uniform" + op: "Add" + input: "bert/encoder/layer_11/output/dropout/random_uniform/mul" + input: "bert/encoder/layer_11/output/dropout/random_uniform/min" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dropout/add" + op: "Add" + input: "bert/encoder/layer_11/output/dropout/keep_prob" + input: "bert/encoder/layer_11/output/dropout/random_uniform" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dropout/Floor" + op: "Floor" + input: "bert/encoder/layer_11/output/dropout/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dropout/div" + op: "RealDiv" + input: "bert/encoder/layer_11/output/dense/BiasAdd" + input: "bert/encoder/layer_11/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dropout/mul" + op: "Mul" + input: "bert/encoder/layer_11/output/dropout/div" + input: "bert/encoder/layer_11/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/add" + op: "Add" + input: "bert/encoder/layer_11/output/dropout/mul" + input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/layer_11/output/LayerNorm/beta" + input: "bert/encoder/layer_11/output/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/layer_11/output/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/layer_11/output/LayerNorm/gamma" + input: "bert/encoder/layer_11/output/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/layer_11/output/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/layer_11/output/add" + input: "bert/encoder/layer_11/output/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/layer_11/output/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/layer_11/output/add" + input: "bert/encoder/layer_11/output/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/layer_11/output/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/batchnorm/add" + op: "Add" + input: "bert/encoder/layer_11/output/LayerNorm/moments/variance" + input: "bert/encoder/layer_11/output/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/layer_11/output/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/layer_11/output/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/layer_11/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/layer_11/output/add" + input: "bert/encoder/layer_11/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/layer_11/output/LayerNorm/moments/mean" + input: "bert/encoder/layer_11/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/layer_11/output/LayerNorm/beta/read" + input: "bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/batchnorm/add_1" + op: "Add" + input: "bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/layer_11/output/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/Reshape_2" + op: "Reshape" + input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/Reshape_3/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/Reshape_3" + op: "Reshape" + input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/Reshape_3/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/Reshape_4/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/Reshape_4" + op: "Reshape" + input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/Reshape_4/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/Reshape_5/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/Reshape_5" + op: "Reshape" + input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/Reshape_5/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/Reshape_6/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/Reshape_6" + op: "Reshape" + input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/Reshape_6/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/Reshape_7/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/Reshape_7" + op: "Reshape" + input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/Reshape_7/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/Reshape_8/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/Reshape_8" + op: "Reshape" + input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/Reshape_8/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/Reshape_9/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/Reshape_9" + op: "Reshape" + input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/Reshape_9/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/Reshape_10/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/Reshape_10" + op: "Reshape" + input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/Reshape_10/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/Reshape_11/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/Reshape_11" + op: "Reshape" + input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/Reshape_11/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/Reshape_12/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/Reshape_12" + op: "Reshape" + input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/Reshape_12/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/Reshape_13/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/Reshape_13" + op: "Reshape" + input: "bert/encoder/layer_11/output/LayerNorm/batchnorm/add_1" + input: "bert/encoder/Reshape_13/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/pooler/strided_slice/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\000\000\000\000\000\000\000\000" + } + } + } +} +node { + name: "bert/pooler/strided_slice/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\000\000\000\000" + } + } + } +} +node { + name: "bert/pooler/strided_slice/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/pooler/strided_slice" + op: "StridedSlice" + input: "bert/encoder/Reshape_13" + input: "bert/pooler/strided_slice/stack" + input: "bert/pooler/strided_slice/stack_1" + input: "bert/pooler/strided_slice/stack_2" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 5 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 5 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "bert/pooler/Squeeze" + op: "Squeeze" + input: "bert/pooler/strided_slice" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "squeeze_dims" + value { + list { + i: 1 + } + } + } +} +node { + name: "bert/pooler/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/pooler/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/pooler/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/pooler/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/pooler/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/pooler/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/pooler/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/pooler/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/pooler/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/pooler/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/pooler/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/pooler/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/pooler/dense/kernel/Assign" + op: "Assign" + input: "bert/pooler/dense/kernel" + input: "bert/pooler/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/pooler/dense/kernel/read" + op: "Identity" + input: "bert/pooler/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/pooler/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/pooler/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/pooler/dense/bias/Assign" + op: "Assign" + input: "bert/pooler/dense/bias" + input: "bert/pooler/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/pooler/dense/bias/read" + op: "Identity" + input: "bert/pooler/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/pooler/dense/MatMul" + op: "MatMul" + input: "bert/pooler/Squeeze" + input: "bert/pooler/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/pooler/dense/BiasAdd" + op: "BiasAdd" + input: "bert/pooler/dense/MatMul" + input: "bert/pooler/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/pooler/dense/Tanh" + op: "Tanh" + input: "bert/pooler/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "range/start" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "range/limit" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 32 + } + } + } +} +node { + name: "range/delta" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "range" + op: "Range" + input: "range/start" + input: "range/limit" + input: "range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + } + } + } + } +} +node { + name: "mul/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 128 + } + } + } +} +node { + name: "mul" + op: "Mul" + input: "range" + input: "mul/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + } + } + } + } +} +node { + name: "Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\377\377\377\377\001\000\000\000" + } + } + } +} +node { + name: "Reshape" + op: "Reshape" + input: "mul" + input: "Reshape/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "add" + op: "Add" + input: "IteratorGetNext:3" + input: "Reshape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 20 + } + } + } + } + } +} +node { + name: "Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -1 + } + } + } +} +node { + name: "Reshape_1" + op: "Reshape" + input: "add" + input: "Reshape_1/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + } + } + } + } +} +node { + name: "Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "Reshape_2" + op: "Reshape" + input: "bert/encoder/Reshape_13" + input: "Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "GatherV2/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "GatherV2" + op: "GatherV2" + input: "Reshape_2" + input: "Reshape_1" + input: "GatherV2/axis" + attr { + key: "Taxis" + value { + type: DT_INT32 + } + } + attr { + key: "Tindices" + value { + type: DT_INT32 + } + } + attr { + key: "Tparams" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "cls/predictions/transform/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "cls/predictions/transform/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "cls/predictions/transform/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "cls/predictions/transform/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "cls/predictions/transform/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "cls/predictions/transform/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "cls/predictions/transform/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "cls/predictions/transform/dense/kernel/Initializer/truncated_normal/mul" + input: "cls/predictions/transform/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "cls/predictions/transform/dense/kernel/Assign" + op: "Assign" + input: "cls/predictions/transform/dense/kernel" + input: "cls/predictions/transform/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/predictions/transform/dense/kernel/read" + op: "Identity" + input: "cls/predictions/transform/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "cls/predictions/transform/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "cls/predictions/transform/dense/bias/Assign" + op: "Assign" + input: "cls/predictions/transform/dense/bias" + input: "cls/predictions/transform/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/predictions/transform/dense/bias/read" + op: "Identity" + input: "cls/predictions/transform/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/dense/MatMul" + op: "MatMul" + input: "GatherV2" + input: "cls/predictions/transform/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "cls/predictions/transform/dense/BiasAdd" + op: "BiasAdd" + input: "cls/predictions/transform/dense/MatMul" + input: "cls/predictions/transform/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "cls/predictions/transform/dense/Sqrt/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "cls/predictions/transform/dense/Sqrt" + op: "Sqrt" + input: "cls/predictions/transform/dense/Sqrt/x" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "cls/predictions/transform/dense/truediv" + op: "RealDiv" + input: "cls/predictions/transform/dense/BiasAdd" + input: "cls/predictions/transform/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/dense/Erf" + op: "Erf" + input: "cls/predictions/transform/dense/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/dense/add/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "cls/predictions/transform/dense/add" + op: "Add" + input: "cls/predictions/transform/dense/add/x" + input: "cls/predictions/transform/dense/Erf" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/dense/mul/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.5 + } + } + } +} +node { + name: "cls/predictions/transform/dense/mul" + op: "Mul" + input: "cls/predictions/transform/dense/mul/x" + input: "cls/predictions/transform/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/dense/mul_1" + op: "Mul" + input: "cls/predictions/transform/dense/BiasAdd" + input: "cls/predictions/transform/dense/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/beta/Assign" + op: "Assign" + input: "cls/predictions/transform/LayerNorm/beta" + input: "cls/predictions/transform/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/beta/read" + op: "Identity" + input: "cls/predictions/transform/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/gamma/Assign" + op: "Assign" + input: "cls/predictions/transform/LayerNorm/gamma" + input: "cls/predictions/transform/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/gamma/read" + op: "Identity" + input: "cls/predictions/transform/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/moments/mean" + op: "Mean" + input: "cls/predictions/transform/dense/mul_1" + input: "cls/predictions/transform/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "cls/predictions/transform/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "cls/predictions/transform/dense/mul_1" + input: "cls/predictions/transform/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/moments/variance" + op: "Mean" + input: "cls/predictions/transform/LayerNorm/moments/SquaredDifference" + input: "cls/predictions/transform/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/batchnorm/add" + op: "Add" + input: "cls/predictions/transform/LayerNorm/moments/variance" + input: "cls/predictions/transform/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "cls/predictions/transform/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/batchnorm/mul" + op: "Mul" + input: "cls/predictions/transform/LayerNorm/batchnorm/Rsqrt" + input: "cls/predictions/transform/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "cls/predictions/transform/dense/mul_1" + input: "cls/predictions/transform/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "cls/predictions/transform/LayerNorm/moments/mean" + input: "cls/predictions/transform/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/batchnorm/sub" + op: "Sub" + input: "cls/predictions/transform/LayerNorm/beta/read" + input: "cls/predictions/transform/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/batchnorm/add_1" + op: "Add" + input: "cls/predictions/transform/LayerNorm/batchnorm/mul_1" + input: "cls/predictions/transform/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "cls/predictions/output_bias/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 28996 + } + } + } +} +node { + name: "cls/predictions/output_bias/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "cls/predictions/output_bias/Initializer/zeros" + op: "Fill" + input: "cls/predictions/output_bias/Initializer/zeros/shape_as_tensor" + input: "cls/predictions/output_bias/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "cls/predictions/output_bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 28996 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "cls/predictions/output_bias/Assign" + op: "Assign" + input: "cls/predictions/output_bias" + input: "cls/predictions/output_bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/predictions/output_bias/read" + op: "Identity" + input: "cls/predictions/output_bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + } + } + } + } +} +node { + name: "cls/predictions/MatMul" + op: "MatMul" + input: "cls/predictions/transform/LayerNorm/batchnorm/add_1" + input: "bert/embeddings/word_embeddings/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 28996 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "cls/predictions/BiasAdd" + op: "BiasAdd" + input: "cls/predictions/MatMul" + input: "cls/predictions/output_bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 28996 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "cls/predictions/LogSoftmax" + op: "LogSoftmax" + input: "cls/predictions/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 28996 + } + } + } + } + } +} +node { + name: "cls/predictions/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -1 + } + } + } +} +node { + name: "cls/predictions/Reshape" + op: "Reshape" + input: "IteratorGetNext:2" + input: "cls/predictions/Reshape/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + } + } + } + } +} +node { + name: "cls/predictions/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -1 + } + } + } +} +node { + name: "cls/predictions/Reshape_1" + op: "Reshape" + input: "IteratorGetNext:4" + input: "cls/predictions/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + } + } + } + } +} +node { + name: "cls/predictions/one_hot/on_value" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "cls/predictions/one_hot/off_value" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "cls/predictions/one_hot/depth" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 28996 + } + } + } +} +node { + name: "cls/predictions/one_hot" + op: "OneHot" + input: "cls/predictions/Reshape" + input: "cls/predictions/one_hot/depth" + input: "cls/predictions/one_hot/on_value" + input: "cls/predictions/one_hot/off_value" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "TI" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 28996 + } + } + } + } + } + attr { + key: "axis" + value { + i: -1 + } + } +} +node { + name: "cls/predictions/mul" + op: "Mul" + input: "cls/predictions/LogSoftmax" + input: "cls/predictions/one_hot" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 28996 + } + } + } + } + } +} +node { + name: "cls/predictions/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -1 + } + } + } +} +node { + name: "cls/predictions/Sum" + op: "Sum" + input: "cls/predictions/mul" + input: "cls/predictions/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "cls/predictions/Neg" + op: "Neg" + input: "cls/predictions/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + } + } + } + } +} +node { + name: "cls/predictions/mul_1" + op: "Mul" + input: "cls/predictions/Reshape_1" + input: "cls/predictions/Neg" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + } + } + } + } +} +node { + name: "cls/predictions/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "cls/predictions/Sum_1" + op: "Sum" + input: "cls/predictions/mul_1" + input: "cls/predictions/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "cls/predictions/Const_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "cls/predictions/Sum_2" + op: "Sum" + input: "cls/predictions/Reshape_1" + input: "cls/predictions/Const_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "cls/predictions/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999747378752e-06 + } + } + } +} +node { + name: "cls/predictions/add" + op: "Add" + input: "cls/predictions/Sum_2" + input: "cls/predictions/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "cls/predictions/truediv" + op: "RealDiv" + input: "cls/predictions/Sum_1" + input: "cls/predictions/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "cls/seq_relationship/output_weights/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\002\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "cls/seq_relationship/output_weights/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "cls/seq_relationship/output_weights/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "cls/seq_relationship/output_weights/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "cls/seq_relationship/output_weights/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "cls/seq_relationship/output_weights/Initializer/truncated_normal/mul" + op: "Mul" + input: "cls/seq_relationship/output_weights/Initializer/truncated_normal/TruncatedNormal" + input: "cls/seq_relationship/output_weights/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "cls/seq_relationship/output_weights/Initializer/truncated_normal" + op: "Add" + input: "cls/seq_relationship/output_weights/Initializer/truncated_normal/mul" + input: "cls/seq_relationship/output_weights/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "cls/seq_relationship/output_weights" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "cls/seq_relationship/output_weights/Assign" + op: "Assign" + input: "cls/seq_relationship/output_weights" + input: "cls/seq_relationship/output_weights/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/seq_relationship/output_weights/read" + op: "Identity" + input: "cls/seq_relationship/output_weights" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "cls/seq_relationship/output_bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 2 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "cls/seq_relationship/output_bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 2 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "cls/seq_relationship/output_bias/Assign" + op: "Assign" + input: "cls/seq_relationship/output_bias" + input: "cls/seq_relationship/output_bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/seq_relationship/output_bias/read" + op: "Identity" + input: "cls/seq_relationship/output_bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "cls/seq_relationship/MatMul" + op: "MatMul" + input: "bert/pooler/dense/Tanh" + input: "cls/seq_relationship/output_weights/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 2 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "cls/seq_relationship/BiasAdd" + op: "BiasAdd" + input: "cls/seq_relationship/MatMul" + input: "cls/seq_relationship/output_bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 2 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "cls/seq_relationship/LogSoftmax" + op: "LogSoftmax" + input: "cls/seq_relationship/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 2 + } + } + } + } + } +} +node { + name: "cls/seq_relationship/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -1 + } + } + } +} +node { + name: "cls/seq_relationship/Reshape" + op: "Reshape" + input: "IteratorGetNext:5" + input: "cls/seq_relationship/Reshape/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + } + } + } + } +} +node { + name: "cls/seq_relationship/one_hot/on_value" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "cls/seq_relationship/one_hot/off_value" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "cls/seq_relationship/one_hot/depth" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "cls/seq_relationship/one_hot" + op: "OneHot" + input: "cls/seq_relationship/Reshape" + input: "cls/seq_relationship/one_hot/depth" + input: "cls/seq_relationship/one_hot/on_value" + input: "cls/seq_relationship/one_hot/off_value" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "TI" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 2 + } + } + } + } + } + attr { + key: "axis" + value { + i: -1 + } + } +} +node { + name: "cls/seq_relationship/mul" + op: "Mul" + input: "cls/seq_relationship/one_hot" + input: "cls/seq_relationship/LogSoftmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 2 + } + } + } + } + } +} +node { + name: "cls/seq_relationship/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } +} +node { + name: "cls/seq_relationship/Sum" + op: "Sum" + input: "cls/seq_relationship/mul" + input: "cls/seq_relationship/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "cls/seq_relationship/Neg" + op: "Neg" + input: "cls/seq_relationship/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + } + } + } + } +} +node { + name: "cls/seq_relationship/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "cls/seq_relationship/Mean" + op: "Mean" + input: "cls/seq_relationship/Neg" + input: "cls/seq_relationship/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "add_1" + op: "Add" + input: "cls/predictions/truediv" + input: "cls/seq_relationship/Mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "checkpoint_initializer/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/embeddings/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer" + op: "RestoreV2" + input: "checkpoint_initializer/prefix" + input: "checkpoint_initializer/tensor_names" + input: "checkpoint_initializer/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign" + op: "Assign" + input: "bert/embeddings/LayerNorm/beta" + input: "checkpoint_initializer" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_1/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_1/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/embeddings/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_1/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_1" + op: "RestoreV2" + input: "checkpoint_initializer_1/prefix" + input: "checkpoint_initializer_1/tensor_names" + input: "checkpoint_initializer_1/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_1" + op: "Assign" + input: "bert/embeddings/LayerNorm/gamma" + input: "checkpoint_initializer_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_2/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_2/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/embeddings/position_embeddings" + } + } + } +} +node { + name: "checkpoint_initializer_2/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_2" + op: "RestoreV2" + input: "checkpoint_initializer_2/prefix" + input: "checkpoint_initializer_2/tensor_names" + input: "checkpoint_initializer_2/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_2" + op: "Assign" + input: "bert/embeddings/position_embeddings" + input: "checkpoint_initializer_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_3/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_3/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/embeddings/token_type_embeddings" + } + } + } +} +node { + name: "checkpoint_initializer_3/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_3" + op: "RestoreV2" + input: "checkpoint_initializer_3/prefix" + input: "checkpoint_initializer_3/tensor_names" + input: "checkpoint_initializer_3/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_3" + op: "Assign" + input: "bert/embeddings/token_type_embeddings" + input: "checkpoint_initializer_3" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_4/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_4/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/embeddings/word_embeddings" + } + } + } +} +node { + name: "checkpoint_initializer_4/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_4" + op: "RestoreV2" + input: "checkpoint_initializer_4/prefix" + input: "checkpoint_initializer_4/tensor_names" + input: "checkpoint_initializer_4/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_4" + op: "Assign" + input: "bert/embeddings/word_embeddings" + input: "checkpoint_initializer_4" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_5/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_5/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_5/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_5" + op: "RestoreV2" + input: "checkpoint_initializer_5/prefix" + input: "checkpoint_initializer_5/tensor_names" + input: "checkpoint_initializer_5/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_5" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta" + input: "checkpoint_initializer_5" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_6/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_6/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_6/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_6" + op: "RestoreV2" + input: "checkpoint_initializer_6/prefix" + input: "checkpoint_initializer_6/tensor_names" + input: "checkpoint_initializer_6/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_6" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma" + input: "checkpoint_initializer_6" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_7/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_7/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_0/attention/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_7/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_7" + op: "RestoreV2" + input: "checkpoint_initializer_7/prefix" + input: "checkpoint_initializer_7/tensor_names" + input: "checkpoint_initializer_7/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_7" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/dense/bias" + input: "checkpoint_initializer_7" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_8/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_8/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_0/attention/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_8/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_8" + op: "RestoreV2" + input: "checkpoint_initializer_8/prefix" + input: "checkpoint_initializer_8/tensor_names" + input: "checkpoint_initializer_8/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_8" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/dense/kernel" + input: "checkpoint_initializer_8" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_9/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_9/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_0/attention/self/key/bias" + } + } + } +} +node { + name: "checkpoint_initializer_9/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_9" + op: "RestoreV2" + input: "checkpoint_initializer_9/prefix" + input: "checkpoint_initializer_9/tensor_names" + input: "checkpoint_initializer_9/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_9" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/key/bias" + input: "checkpoint_initializer_9" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_10/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_10/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_0/attention/self/key/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_10/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_10" + op: "RestoreV2" + input: "checkpoint_initializer_10/prefix" + input: "checkpoint_initializer_10/tensor_names" + input: "checkpoint_initializer_10/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_10" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/key/kernel" + input: "checkpoint_initializer_10" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_11/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_11/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_0/attention/self/query/bias" + } + } + } +} +node { + name: "checkpoint_initializer_11/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_11" + op: "RestoreV2" + input: "checkpoint_initializer_11/prefix" + input: "checkpoint_initializer_11/tensor_names" + input: "checkpoint_initializer_11/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_11" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/query/bias" + input: "checkpoint_initializer_11" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_12/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_12/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_0/attention/self/query/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_12/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_12" + op: "RestoreV2" + input: "checkpoint_initializer_12/prefix" + input: "checkpoint_initializer_12/tensor_names" + input: "checkpoint_initializer_12/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_12" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/query/kernel" + input: "checkpoint_initializer_12" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_13/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_13/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_0/attention/self/value/bias" + } + } + } +} +node { + name: "checkpoint_initializer_13/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_13" + op: "RestoreV2" + input: "checkpoint_initializer_13/prefix" + input: "checkpoint_initializer_13/tensor_names" + input: "checkpoint_initializer_13/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_13" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/value/bias" + input: "checkpoint_initializer_13" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_14/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_14/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_0/attention/self/value/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_14/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_14" + op: "RestoreV2" + input: "checkpoint_initializer_14/prefix" + input: "checkpoint_initializer_14/tensor_names" + input: "checkpoint_initializer_14/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_14" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/value/kernel" + input: "checkpoint_initializer_14" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_15/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_15/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_0/intermediate/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_15/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_15" + op: "RestoreV2" + input: "checkpoint_initializer_15/prefix" + input: "checkpoint_initializer_15/tensor_names" + input: "checkpoint_initializer_15/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_15" + op: "Assign" + input: "bert/encoder/layer_0/intermediate/dense/bias" + input: "checkpoint_initializer_15" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_16/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_16/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_0/intermediate/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_16/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_16" + op: "RestoreV2" + input: "checkpoint_initializer_16/prefix" + input: "checkpoint_initializer_16/tensor_names" + input: "checkpoint_initializer_16/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_16" + op: "Assign" + input: "bert/encoder/layer_0/intermediate/dense/kernel" + input: "checkpoint_initializer_16" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_17/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_17/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_0/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_17/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_17" + op: "RestoreV2" + input: "checkpoint_initializer_17/prefix" + input: "checkpoint_initializer_17/tensor_names" + input: "checkpoint_initializer_17/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_17" + op: "Assign" + input: "bert/encoder/layer_0/output/LayerNorm/beta" + input: "checkpoint_initializer_17" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_18/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_18/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_0/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_18/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_18" + op: "RestoreV2" + input: "checkpoint_initializer_18/prefix" + input: "checkpoint_initializer_18/tensor_names" + input: "checkpoint_initializer_18/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_18" + op: "Assign" + input: "bert/encoder/layer_0/output/LayerNorm/gamma" + input: "checkpoint_initializer_18" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_19/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_19/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_0/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_19/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_19" + op: "RestoreV2" + input: "checkpoint_initializer_19/prefix" + input: "checkpoint_initializer_19/tensor_names" + input: "checkpoint_initializer_19/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_19" + op: "Assign" + input: "bert/encoder/layer_0/output/dense/bias" + input: "checkpoint_initializer_19" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_20/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_20/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_0/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_20/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_20" + op: "RestoreV2" + input: "checkpoint_initializer_20/prefix" + input: "checkpoint_initializer_20/tensor_names" + input: "checkpoint_initializer_20/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_20" + op: "Assign" + input: "bert/encoder/layer_0/output/dense/kernel" + input: "checkpoint_initializer_20" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_21/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_21/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_21/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_21" + op: "RestoreV2" + input: "checkpoint_initializer_21/prefix" + input: "checkpoint_initializer_21/tensor_names" + input: "checkpoint_initializer_21/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_21" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta" + input: "checkpoint_initializer_21" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_22/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_22/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_22/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_22" + op: "RestoreV2" + input: "checkpoint_initializer_22/prefix" + input: "checkpoint_initializer_22/tensor_names" + input: "checkpoint_initializer_22/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_22" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma" + input: "checkpoint_initializer_22" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_23/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_23/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_1/attention/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_23/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_23" + op: "RestoreV2" + input: "checkpoint_initializer_23/prefix" + input: "checkpoint_initializer_23/tensor_names" + input: "checkpoint_initializer_23/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_23" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/dense/bias" + input: "checkpoint_initializer_23" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_24/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_24/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_1/attention/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_24/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_24" + op: "RestoreV2" + input: "checkpoint_initializer_24/prefix" + input: "checkpoint_initializer_24/tensor_names" + input: "checkpoint_initializer_24/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_24" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/dense/kernel" + input: "checkpoint_initializer_24" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_25/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_25/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_1/attention/self/key/bias" + } + } + } +} +node { + name: "checkpoint_initializer_25/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_25" + op: "RestoreV2" + input: "checkpoint_initializer_25/prefix" + input: "checkpoint_initializer_25/tensor_names" + input: "checkpoint_initializer_25/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_25" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/key/bias" + input: "checkpoint_initializer_25" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_26/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_26/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_1/attention/self/key/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_26/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_26" + op: "RestoreV2" + input: "checkpoint_initializer_26/prefix" + input: "checkpoint_initializer_26/tensor_names" + input: "checkpoint_initializer_26/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_26" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/key/kernel" + input: "checkpoint_initializer_26" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_27/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_27/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_1/attention/self/query/bias" + } + } + } +} +node { + name: "checkpoint_initializer_27/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_27" + op: "RestoreV2" + input: "checkpoint_initializer_27/prefix" + input: "checkpoint_initializer_27/tensor_names" + input: "checkpoint_initializer_27/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_27" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/query/bias" + input: "checkpoint_initializer_27" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_28/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_28/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_1/attention/self/query/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_28/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_28" + op: "RestoreV2" + input: "checkpoint_initializer_28/prefix" + input: "checkpoint_initializer_28/tensor_names" + input: "checkpoint_initializer_28/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_28" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/query/kernel" + input: "checkpoint_initializer_28" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_29/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_29/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_1/attention/self/value/bias" + } + } + } +} +node { + name: "checkpoint_initializer_29/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_29" + op: "RestoreV2" + input: "checkpoint_initializer_29/prefix" + input: "checkpoint_initializer_29/tensor_names" + input: "checkpoint_initializer_29/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_29" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/value/bias" + input: "checkpoint_initializer_29" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_30/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_30/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_1/attention/self/value/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_30/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_30" + op: "RestoreV2" + input: "checkpoint_initializer_30/prefix" + input: "checkpoint_initializer_30/tensor_names" + input: "checkpoint_initializer_30/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_30" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/value/kernel" + input: "checkpoint_initializer_30" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_31/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_31/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_1/intermediate/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_31/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_31" + op: "RestoreV2" + input: "checkpoint_initializer_31/prefix" + input: "checkpoint_initializer_31/tensor_names" + input: "checkpoint_initializer_31/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_31" + op: "Assign" + input: "bert/encoder/layer_1/intermediate/dense/bias" + input: "checkpoint_initializer_31" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_32/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_32/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_1/intermediate/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_32/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_32" + op: "RestoreV2" + input: "checkpoint_initializer_32/prefix" + input: "checkpoint_initializer_32/tensor_names" + input: "checkpoint_initializer_32/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_32" + op: "Assign" + input: "bert/encoder/layer_1/intermediate/dense/kernel" + input: "checkpoint_initializer_32" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_33/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_33/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_1/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_33/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_33" + op: "RestoreV2" + input: "checkpoint_initializer_33/prefix" + input: "checkpoint_initializer_33/tensor_names" + input: "checkpoint_initializer_33/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_33" + op: "Assign" + input: "bert/encoder/layer_1/output/LayerNorm/beta" + input: "checkpoint_initializer_33" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_34/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_34/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_1/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_34/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_34" + op: "RestoreV2" + input: "checkpoint_initializer_34/prefix" + input: "checkpoint_initializer_34/tensor_names" + input: "checkpoint_initializer_34/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_34" + op: "Assign" + input: "bert/encoder/layer_1/output/LayerNorm/gamma" + input: "checkpoint_initializer_34" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_35/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_35/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_1/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_35/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_35" + op: "RestoreV2" + input: "checkpoint_initializer_35/prefix" + input: "checkpoint_initializer_35/tensor_names" + input: "checkpoint_initializer_35/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_35" + op: "Assign" + input: "bert/encoder/layer_1/output/dense/bias" + input: "checkpoint_initializer_35" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_36/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_36/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_1/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_36/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_36" + op: "RestoreV2" + input: "checkpoint_initializer_36/prefix" + input: "checkpoint_initializer_36/tensor_names" + input: "checkpoint_initializer_36/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_36" + op: "Assign" + input: "bert/encoder/layer_1/output/dense/kernel" + input: "checkpoint_initializer_36" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_37/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_37/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_37/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_37" + op: "RestoreV2" + input: "checkpoint_initializer_37/prefix" + input: "checkpoint_initializer_37/tensor_names" + input: "checkpoint_initializer_37/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_37" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta" + input: "checkpoint_initializer_37" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_38/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_38/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_38/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_38" + op: "RestoreV2" + input: "checkpoint_initializer_38/prefix" + input: "checkpoint_initializer_38/tensor_names" + input: "checkpoint_initializer_38/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_38" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma" + input: "checkpoint_initializer_38" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_39/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_39/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_10/attention/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_39/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_39" + op: "RestoreV2" + input: "checkpoint_initializer_39/prefix" + input: "checkpoint_initializer_39/tensor_names" + input: "checkpoint_initializer_39/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_39" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/dense/bias" + input: "checkpoint_initializer_39" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_40/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_40/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_10/attention/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_40/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_40" + op: "RestoreV2" + input: "checkpoint_initializer_40/prefix" + input: "checkpoint_initializer_40/tensor_names" + input: "checkpoint_initializer_40/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_40" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/dense/kernel" + input: "checkpoint_initializer_40" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_41/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_41/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_10/attention/self/key/bias" + } + } + } +} +node { + name: "checkpoint_initializer_41/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_41" + op: "RestoreV2" + input: "checkpoint_initializer_41/prefix" + input: "checkpoint_initializer_41/tensor_names" + input: "checkpoint_initializer_41/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_41" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/key/bias" + input: "checkpoint_initializer_41" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_42/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_42/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_10/attention/self/key/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_42/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_42" + op: "RestoreV2" + input: "checkpoint_initializer_42/prefix" + input: "checkpoint_initializer_42/tensor_names" + input: "checkpoint_initializer_42/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_42" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/key/kernel" + input: "checkpoint_initializer_42" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_43/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_43/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_10/attention/self/query/bias" + } + } + } +} +node { + name: "checkpoint_initializer_43/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_43" + op: "RestoreV2" + input: "checkpoint_initializer_43/prefix" + input: "checkpoint_initializer_43/tensor_names" + input: "checkpoint_initializer_43/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_43" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/query/bias" + input: "checkpoint_initializer_43" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_44/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_44/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_10/attention/self/query/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_44/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_44" + op: "RestoreV2" + input: "checkpoint_initializer_44/prefix" + input: "checkpoint_initializer_44/tensor_names" + input: "checkpoint_initializer_44/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_44" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/query/kernel" + input: "checkpoint_initializer_44" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_45/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_45/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_10/attention/self/value/bias" + } + } + } +} +node { + name: "checkpoint_initializer_45/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_45" + op: "RestoreV2" + input: "checkpoint_initializer_45/prefix" + input: "checkpoint_initializer_45/tensor_names" + input: "checkpoint_initializer_45/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_45" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/value/bias" + input: "checkpoint_initializer_45" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_46/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_46/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_10/attention/self/value/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_46/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_46" + op: "RestoreV2" + input: "checkpoint_initializer_46/prefix" + input: "checkpoint_initializer_46/tensor_names" + input: "checkpoint_initializer_46/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_46" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/value/kernel" + input: "checkpoint_initializer_46" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_47/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_47/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_10/intermediate/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_47/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_47" + op: "RestoreV2" + input: "checkpoint_initializer_47/prefix" + input: "checkpoint_initializer_47/tensor_names" + input: "checkpoint_initializer_47/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_47" + op: "Assign" + input: "bert/encoder/layer_10/intermediate/dense/bias" + input: "checkpoint_initializer_47" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_48/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_48/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_10/intermediate/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_48/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_48" + op: "RestoreV2" + input: "checkpoint_initializer_48/prefix" + input: "checkpoint_initializer_48/tensor_names" + input: "checkpoint_initializer_48/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_48" + op: "Assign" + input: "bert/encoder/layer_10/intermediate/dense/kernel" + input: "checkpoint_initializer_48" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_49/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_49/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_10/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_49/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_49" + op: "RestoreV2" + input: "checkpoint_initializer_49/prefix" + input: "checkpoint_initializer_49/tensor_names" + input: "checkpoint_initializer_49/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_49" + op: "Assign" + input: "bert/encoder/layer_10/output/LayerNorm/beta" + input: "checkpoint_initializer_49" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_50/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_50/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_10/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_50/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_50" + op: "RestoreV2" + input: "checkpoint_initializer_50/prefix" + input: "checkpoint_initializer_50/tensor_names" + input: "checkpoint_initializer_50/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_50" + op: "Assign" + input: "bert/encoder/layer_10/output/LayerNorm/gamma" + input: "checkpoint_initializer_50" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_51/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_51/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_10/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_51/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_51" + op: "RestoreV2" + input: "checkpoint_initializer_51/prefix" + input: "checkpoint_initializer_51/tensor_names" + input: "checkpoint_initializer_51/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_51" + op: "Assign" + input: "bert/encoder/layer_10/output/dense/bias" + input: "checkpoint_initializer_51" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_52/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_52/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_10/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_52/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_52" + op: "RestoreV2" + input: "checkpoint_initializer_52/prefix" + input: "checkpoint_initializer_52/tensor_names" + input: "checkpoint_initializer_52/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_52" + op: "Assign" + input: "bert/encoder/layer_10/output/dense/kernel" + input: "checkpoint_initializer_52" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_53/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_53/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_53/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_53" + op: "RestoreV2" + input: "checkpoint_initializer_53/prefix" + input: "checkpoint_initializer_53/tensor_names" + input: "checkpoint_initializer_53/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_53" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta" + input: "checkpoint_initializer_53" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_54/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_54/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_54/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_54" + op: "RestoreV2" + input: "checkpoint_initializer_54/prefix" + input: "checkpoint_initializer_54/tensor_names" + input: "checkpoint_initializer_54/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_54" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma" + input: "checkpoint_initializer_54" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_55/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_55/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_11/attention/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_55/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_55" + op: "RestoreV2" + input: "checkpoint_initializer_55/prefix" + input: "checkpoint_initializer_55/tensor_names" + input: "checkpoint_initializer_55/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_55" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/dense/bias" + input: "checkpoint_initializer_55" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_56/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_56/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_11/attention/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_56/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_56" + op: "RestoreV2" + input: "checkpoint_initializer_56/prefix" + input: "checkpoint_initializer_56/tensor_names" + input: "checkpoint_initializer_56/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_56" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/dense/kernel" + input: "checkpoint_initializer_56" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_57/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_57/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_11/attention/self/key/bias" + } + } + } +} +node { + name: "checkpoint_initializer_57/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_57" + op: "RestoreV2" + input: "checkpoint_initializer_57/prefix" + input: "checkpoint_initializer_57/tensor_names" + input: "checkpoint_initializer_57/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_57" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/key/bias" + input: "checkpoint_initializer_57" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_58/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_58/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_11/attention/self/key/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_58/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_58" + op: "RestoreV2" + input: "checkpoint_initializer_58/prefix" + input: "checkpoint_initializer_58/tensor_names" + input: "checkpoint_initializer_58/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_58" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/key/kernel" + input: "checkpoint_initializer_58" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_59/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_59/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_11/attention/self/query/bias" + } + } + } +} +node { + name: "checkpoint_initializer_59/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_59" + op: "RestoreV2" + input: "checkpoint_initializer_59/prefix" + input: "checkpoint_initializer_59/tensor_names" + input: "checkpoint_initializer_59/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_59" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/query/bias" + input: "checkpoint_initializer_59" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_60/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_60/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_11/attention/self/query/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_60/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_60" + op: "RestoreV2" + input: "checkpoint_initializer_60/prefix" + input: "checkpoint_initializer_60/tensor_names" + input: "checkpoint_initializer_60/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_60" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/query/kernel" + input: "checkpoint_initializer_60" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_61/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_61/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_11/attention/self/value/bias" + } + } + } +} +node { + name: "checkpoint_initializer_61/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_61" + op: "RestoreV2" + input: "checkpoint_initializer_61/prefix" + input: "checkpoint_initializer_61/tensor_names" + input: "checkpoint_initializer_61/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_61" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/value/bias" + input: "checkpoint_initializer_61" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_62/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_62/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_11/attention/self/value/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_62/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_62" + op: "RestoreV2" + input: "checkpoint_initializer_62/prefix" + input: "checkpoint_initializer_62/tensor_names" + input: "checkpoint_initializer_62/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_62" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/value/kernel" + input: "checkpoint_initializer_62" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_63/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_63/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_11/intermediate/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_63/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_63" + op: "RestoreV2" + input: "checkpoint_initializer_63/prefix" + input: "checkpoint_initializer_63/tensor_names" + input: "checkpoint_initializer_63/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_63" + op: "Assign" + input: "bert/encoder/layer_11/intermediate/dense/bias" + input: "checkpoint_initializer_63" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_64/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_64/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_11/intermediate/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_64/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_64" + op: "RestoreV2" + input: "checkpoint_initializer_64/prefix" + input: "checkpoint_initializer_64/tensor_names" + input: "checkpoint_initializer_64/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_64" + op: "Assign" + input: "bert/encoder/layer_11/intermediate/dense/kernel" + input: "checkpoint_initializer_64" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_65/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_65/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_11/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_65/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_65" + op: "RestoreV2" + input: "checkpoint_initializer_65/prefix" + input: "checkpoint_initializer_65/tensor_names" + input: "checkpoint_initializer_65/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_65" + op: "Assign" + input: "bert/encoder/layer_11/output/LayerNorm/beta" + input: "checkpoint_initializer_65" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_66/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_66/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_11/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_66/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_66" + op: "RestoreV2" + input: "checkpoint_initializer_66/prefix" + input: "checkpoint_initializer_66/tensor_names" + input: "checkpoint_initializer_66/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_66" + op: "Assign" + input: "bert/encoder/layer_11/output/LayerNorm/gamma" + input: "checkpoint_initializer_66" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_67/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_67/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_11/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_67/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_67" + op: "RestoreV2" + input: "checkpoint_initializer_67/prefix" + input: "checkpoint_initializer_67/tensor_names" + input: "checkpoint_initializer_67/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_67" + op: "Assign" + input: "bert/encoder/layer_11/output/dense/bias" + input: "checkpoint_initializer_67" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_68/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_68/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_11/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_68/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_68" + op: "RestoreV2" + input: "checkpoint_initializer_68/prefix" + input: "checkpoint_initializer_68/tensor_names" + input: "checkpoint_initializer_68/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_68" + op: "Assign" + input: "bert/encoder/layer_11/output/dense/kernel" + input: "checkpoint_initializer_68" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_69/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_69/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_69/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_69" + op: "RestoreV2" + input: "checkpoint_initializer_69/prefix" + input: "checkpoint_initializer_69/tensor_names" + input: "checkpoint_initializer_69/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_69" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta" + input: "checkpoint_initializer_69" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_70/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_70/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_70/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_70" + op: "RestoreV2" + input: "checkpoint_initializer_70/prefix" + input: "checkpoint_initializer_70/tensor_names" + input: "checkpoint_initializer_70/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_70" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma" + input: "checkpoint_initializer_70" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_71/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_71/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_2/attention/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_71/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_71" + op: "RestoreV2" + input: "checkpoint_initializer_71/prefix" + input: "checkpoint_initializer_71/tensor_names" + input: "checkpoint_initializer_71/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_71" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/dense/bias" + input: "checkpoint_initializer_71" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_72/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_72/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_2/attention/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_72/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_72" + op: "RestoreV2" + input: "checkpoint_initializer_72/prefix" + input: "checkpoint_initializer_72/tensor_names" + input: "checkpoint_initializer_72/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_72" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/dense/kernel" + input: "checkpoint_initializer_72" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_73/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_73/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_2/attention/self/key/bias" + } + } + } +} +node { + name: "checkpoint_initializer_73/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_73" + op: "RestoreV2" + input: "checkpoint_initializer_73/prefix" + input: "checkpoint_initializer_73/tensor_names" + input: "checkpoint_initializer_73/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_73" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/key/bias" + input: "checkpoint_initializer_73" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_74/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_74/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_2/attention/self/key/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_74/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_74" + op: "RestoreV2" + input: "checkpoint_initializer_74/prefix" + input: "checkpoint_initializer_74/tensor_names" + input: "checkpoint_initializer_74/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_74" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/key/kernel" + input: "checkpoint_initializer_74" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_75/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_75/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_2/attention/self/query/bias" + } + } + } +} +node { + name: "checkpoint_initializer_75/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_75" + op: "RestoreV2" + input: "checkpoint_initializer_75/prefix" + input: "checkpoint_initializer_75/tensor_names" + input: "checkpoint_initializer_75/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_75" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/query/bias" + input: "checkpoint_initializer_75" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_76/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_76/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_2/attention/self/query/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_76/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_76" + op: "RestoreV2" + input: "checkpoint_initializer_76/prefix" + input: "checkpoint_initializer_76/tensor_names" + input: "checkpoint_initializer_76/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_76" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/query/kernel" + input: "checkpoint_initializer_76" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_77/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_77/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_2/attention/self/value/bias" + } + } + } +} +node { + name: "checkpoint_initializer_77/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_77" + op: "RestoreV2" + input: "checkpoint_initializer_77/prefix" + input: "checkpoint_initializer_77/tensor_names" + input: "checkpoint_initializer_77/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_77" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/value/bias" + input: "checkpoint_initializer_77" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_78/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_78/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_2/attention/self/value/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_78/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_78" + op: "RestoreV2" + input: "checkpoint_initializer_78/prefix" + input: "checkpoint_initializer_78/tensor_names" + input: "checkpoint_initializer_78/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_78" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/value/kernel" + input: "checkpoint_initializer_78" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_79/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_79/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_2/intermediate/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_79/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_79" + op: "RestoreV2" + input: "checkpoint_initializer_79/prefix" + input: "checkpoint_initializer_79/tensor_names" + input: "checkpoint_initializer_79/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_79" + op: "Assign" + input: "bert/encoder/layer_2/intermediate/dense/bias" + input: "checkpoint_initializer_79" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_80/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_80/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_2/intermediate/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_80/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_80" + op: "RestoreV2" + input: "checkpoint_initializer_80/prefix" + input: "checkpoint_initializer_80/tensor_names" + input: "checkpoint_initializer_80/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_80" + op: "Assign" + input: "bert/encoder/layer_2/intermediate/dense/kernel" + input: "checkpoint_initializer_80" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_81/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_81/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_2/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_81/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_81" + op: "RestoreV2" + input: "checkpoint_initializer_81/prefix" + input: "checkpoint_initializer_81/tensor_names" + input: "checkpoint_initializer_81/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_81" + op: "Assign" + input: "bert/encoder/layer_2/output/LayerNorm/beta" + input: "checkpoint_initializer_81" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_82/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_82/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_2/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_82/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_82" + op: "RestoreV2" + input: "checkpoint_initializer_82/prefix" + input: "checkpoint_initializer_82/tensor_names" + input: "checkpoint_initializer_82/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_82" + op: "Assign" + input: "bert/encoder/layer_2/output/LayerNorm/gamma" + input: "checkpoint_initializer_82" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_83/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_83/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_2/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_83/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_83" + op: "RestoreV2" + input: "checkpoint_initializer_83/prefix" + input: "checkpoint_initializer_83/tensor_names" + input: "checkpoint_initializer_83/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_83" + op: "Assign" + input: "bert/encoder/layer_2/output/dense/bias" + input: "checkpoint_initializer_83" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_84/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_84/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_2/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_84/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_84" + op: "RestoreV2" + input: "checkpoint_initializer_84/prefix" + input: "checkpoint_initializer_84/tensor_names" + input: "checkpoint_initializer_84/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_84" + op: "Assign" + input: "bert/encoder/layer_2/output/dense/kernel" + input: "checkpoint_initializer_84" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_85/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_85/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_85/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_85" + op: "RestoreV2" + input: "checkpoint_initializer_85/prefix" + input: "checkpoint_initializer_85/tensor_names" + input: "checkpoint_initializer_85/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_85" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta" + input: "checkpoint_initializer_85" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_86/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_86/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_86/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_86" + op: "RestoreV2" + input: "checkpoint_initializer_86/prefix" + input: "checkpoint_initializer_86/tensor_names" + input: "checkpoint_initializer_86/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_86" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma" + input: "checkpoint_initializer_86" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_87/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_87/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_3/attention/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_87/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_87" + op: "RestoreV2" + input: "checkpoint_initializer_87/prefix" + input: "checkpoint_initializer_87/tensor_names" + input: "checkpoint_initializer_87/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_87" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/dense/bias" + input: "checkpoint_initializer_87" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_88/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_88/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_3/attention/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_88/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_88" + op: "RestoreV2" + input: "checkpoint_initializer_88/prefix" + input: "checkpoint_initializer_88/tensor_names" + input: "checkpoint_initializer_88/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_88" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/dense/kernel" + input: "checkpoint_initializer_88" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_89/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_89/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_3/attention/self/key/bias" + } + } + } +} +node { + name: "checkpoint_initializer_89/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_89" + op: "RestoreV2" + input: "checkpoint_initializer_89/prefix" + input: "checkpoint_initializer_89/tensor_names" + input: "checkpoint_initializer_89/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_89" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/key/bias" + input: "checkpoint_initializer_89" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_90/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_90/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_3/attention/self/key/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_90/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_90" + op: "RestoreV2" + input: "checkpoint_initializer_90/prefix" + input: "checkpoint_initializer_90/tensor_names" + input: "checkpoint_initializer_90/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_90" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/key/kernel" + input: "checkpoint_initializer_90" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_91/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_91/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_3/attention/self/query/bias" + } + } + } +} +node { + name: "checkpoint_initializer_91/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_91" + op: "RestoreV2" + input: "checkpoint_initializer_91/prefix" + input: "checkpoint_initializer_91/tensor_names" + input: "checkpoint_initializer_91/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_91" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/query/bias" + input: "checkpoint_initializer_91" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_92/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_92/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_3/attention/self/query/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_92/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_92" + op: "RestoreV2" + input: "checkpoint_initializer_92/prefix" + input: "checkpoint_initializer_92/tensor_names" + input: "checkpoint_initializer_92/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_92" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/query/kernel" + input: "checkpoint_initializer_92" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_93/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_93/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_3/attention/self/value/bias" + } + } + } +} +node { + name: "checkpoint_initializer_93/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_93" + op: "RestoreV2" + input: "checkpoint_initializer_93/prefix" + input: "checkpoint_initializer_93/tensor_names" + input: "checkpoint_initializer_93/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_93" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/value/bias" + input: "checkpoint_initializer_93" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_94/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_94/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_3/attention/self/value/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_94/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_94" + op: "RestoreV2" + input: "checkpoint_initializer_94/prefix" + input: "checkpoint_initializer_94/tensor_names" + input: "checkpoint_initializer_94/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_94" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/value/kernel" + input: "checkpoint_initializer_94" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_95/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_95/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_3/intermediate/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_95/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_95" + op: "RestoreV2" + input: "checkpoint_initializer_95/prefix" + input: "checkpoint_initializer_95/tensor_names" + input: "checkpoint_initializer_95/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_95" + op: "Assign" + input: "bert/encoder/layer_3/intermediate/dense/bias" + input: "checkpoint_initializer_95" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_96/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_96/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_3/intermediate/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_96/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_96" + op: "RestoreV2" + input: "checkpoint_initializer_96/prefix" + input: "checkpoint_initializer_96/tensor_names" + input: "checkpoint_initializer_96/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_96" + op: "Assign" + input: "bert/encoder/layer_3/intermediate/dense/kernel" + input: "checkpoint_initializer_96" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_97/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_97/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_3/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_97/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_97" + op: "RestoreV2" + input: "checkpoint_initializer_97/prefix" + input: "checkpoint_initializer_97/tensor_names" + input: "checkpoint_initializer_97/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_97" + op: "Assign" + input: "bert/encoder/layer_3/output/LayerNorm/beta" + input: "checkpoint_initializer_97" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_98/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_98/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_3/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_98/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_98" + op: "RestoreV2" + input: "checkpoint_initializer_98/prefix" + input: "checkpoint_initializer_98/tensor_names" + input: "checkpoint_initializer_98/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_98" + op: "Assign" + input: "bert/encoder/layer_3/output/LayerNorm/gamma" + input: "checkpoint_initializer_98" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_99/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_99/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_3/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_99/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_99" + op: "RestoreV2" + input: "checkpoint_initializer_99/prefix" + input: "checkpoint_initializer_99/tensor_names" + input: "checkpoint_initializer_99/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_99" + op: "Assign" + input: "bert/encoder/layer_3/output/dense/bias" + input: "checkpoint_initializer_99" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_100/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_100/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_3/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_100/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_100" + op: "RestoreV2" + input: "checkpoint_initializer_100/prefix" + input: "checkpoint_initializer_100/tensor_names" + input: "checkpoint_initializer_100/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_100" + op: "Assign" + input: "bert/encoder/layer_3/output/dense/kernel" + input: "checkpoint_initializer_100" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_101/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_101/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_101/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_101" + op: "RestoreV2" + input: "checkpoint_initializer_101/prefix" + input: "checkpoint_initializer_101/tensor_names" + input: "checkpoint_initializer_101/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_101" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta" + input: "checkpoint_initializer_101" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_102/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_102/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_102/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_102" + op: "RestoreV2" + input: "checkpoint_initializer_102/prefix" + input: "checkpoint_initializer_102/tensor_names" + input: "checkpoint_initializer_102/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_102" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma" + input: "checkpoint_initializer_102" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_103/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_103/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_4/attention/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_103/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_103" + op: "RestoreV2" + input: "checkpoint_initializer_103/prefix" + input: "checkpoint_initializer_103/tensor_names" + input: "checkpoint_initializer_103/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_103" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/dense/bias" + input: "checkpoint_initializer_103" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_104/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_104/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_4/attention/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_104/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_104" + op: "RestoreV2" + input: "checkpoint_initializer_104/prefix" + input: "checkpoint_initializer_104/tensor_names" + input: "checkpoint_initializer_104/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_104" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/dense/kernel" + input: "checkpoint_initializer_104" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_105/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_105/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_4/attention/self/key/bias" + } + } + } +} +node { + name: "checkpoint_initializer_105/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_105" + op: "RestoreV2" + input: "checkpoint_initializer_105/prefix" + input: "checkpoint_initializer_105/tensor_names" + input: "checkpoint_initializer_105/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_105" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/key/bias" + input: "checkpoint_initializer_105" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_106/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_106/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_4/attention/self/key/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_106/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_106" + op: "RestoreV2" + input: "checkpoint_initializer_106/prefix" + input: "checkpoint_initializer_106/tensor_names" + input: "checkpoint_initializer_106/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_106" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/key/kernel" + input: "checkpoint_initializer_106" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_107/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_107/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_4/attention/self/query/bias" + } + } + } +} +node { + name: "checkpoint_initializer_107/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_107" + op: "RestoreV2" + input: "checkpoint_initializer_107/prefix" + input: "checkpoint_initializer_107/tensor_names" + input: "checkpoint_initializer_107/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_107" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/query/bias" + input: "checkpoint_initializer_107" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_108/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_108/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_4/attention/self/query/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_108/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_108" + op: "RestoreV2" + input: "checkpoint_initializer_108/prefix" + input: "checkpoint_initializer_108/tensor_names" + input: "checkpoint_initializer_108/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_108" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/query/kernel" + input: "checkpoint_initializer_108" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_109/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_109/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_4/attention/self/value/bias" + } + } + } +} +node { + name: "checkpoint_initializer_109/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_109" + op: "RestoreV2" + input: "checkpoint_initializer_109/prefix" + input: "checkpoint_initializer_109/tensor_names" + input: "checkpoint_initializer_109/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_109" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/value/bias" + input: "checkpoint_initializer_109" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_110/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_110/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_4/attention/self/value/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_110/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_110" + op: "RestoreV2" + input: "checkpoint_initializer_110/prefix" + input: "checkpoint_initializer_110/tensor_names" + input: "checkpoint_initializer_110/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_110" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/value/kernel" + input: "checkpoint_initializer_110" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_111/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_111/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_4/intermediate/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_111/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_111" + op: "RestoreV2" + input: "checkpoint_initializer_111/prefix" + input: "checkpoint_initializer_111/tensor_names" + input: "checkpoint_initializer_111/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_111" + op: "Assign" + input: "bert/encoder/layer_4/intermediate/dense/bias" + input: "checkpoint_initializer_111" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_112/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_112/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_4/intermediate/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_112/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_112" + op: "RestoreV2" + input: "checkpoint_initializer_112/prefix" + input: "checkpoint_initializer_112/tensor_names" + input: "checkpoint_initializer_112/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_112" + op: "Assign" + input: "bert/encoder/layer_4/intermediate/dense/kernel" + input: "checkpoint_initializer_112" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_113/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_113/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_4/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_113/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_113" + op: "RestoreV2" + input: "checkpoint_initializer_113/prefix" + input: "checkpoint_initializer_113/tensor_names" + input: "checkpoint_initializer_113/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_113" + op: "Assign" + input: "bert/encoder/layer_4/output/LayerNorm/beta" + input: "checkpoint_initializer_113" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_114/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_114/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_4/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_114/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_114" + op: "RestoreV2" + input: "checkpoint_initializer_114/prefix" + input: "checkpoint_initializer_114/tensor_names" + input: "checkpoint_initializer_114/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_114" + op: "Assign" + input: "bert/encoder/layer_4/output/LayerNorm/gamma" + input: "checkpoint_initializer_114" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_115/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_115/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_4/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_115/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_115" + op: "RestoreV2" + input: "checkpoint_initializer_115/prefix" + input: "checkpoint_initializer_115/tensor_names" + input: "checkpoint_initializer_115/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_115" + op: "Assign" + input: "bert/encoder/layer_4/output/dense/bias" + input: "checkpoint_initializer_115" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_116/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_116/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_4/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_116/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_116" + op: "RestoreV2" + input: "checkpoint_initializer_116/prefix" + input: "checkpoint_initializer_116/tensor_names" + input: "checkpoint_initializer_116/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_116" + op: "Assign" + input: "bert/encoder/layer_4/output/dense/kernel" + input: "checkpoint_initializer_116" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_117/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_117/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_117/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_117" + op: "RestoreV2" + input: "checkpoint_initializer_117/prefix" + input: "checkpoint_initializer_117/tensor_names" + input: "checkpoint_initializer_117/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_117" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta" + input: "checkpoint_initializer_117" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_118/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_118/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_118/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_118" + op: "RestoreV2" + input: "checkpoint_initializer_118/prefix" + input: "checkpoint_initializer_118/tensor_names" + input: "checkpoint_initializer_118/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_118" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma" + input: "checkpoint_initializer_118" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_119/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_119/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_5/attention/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_119/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_119" + op: "RestoreV2" + input: "checkpoint_initializer_119/prefix" + input: "checkpoint_initializer_119/tensor_names" + input: "checkpoint_initializer_119/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_119" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/dense/bias" + input: "checkpoint_initializer_119" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_120/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_120/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_5/attention/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_120/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_120" + op: "RestoreV2" + input: "checkpoint_initializer_120/prefix" + input: "checkpoint_initializer_120/tensor_names" + input: "checkpoint_initializer_120/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_120" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/dense/kernel" + input: "checkpoint_initializer_120" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_121/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_121/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_5/attention/self/key/bias" + } + } + } +} +node { + name: "checkpoint_initializer_121/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_121" + op: "RestoreV2" + input: "checkpoint_initializer_121/prefix" + input: "checkpoint_initializer_121/tensor_names" + input: "checkpoint_initializer_121/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_121" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/key/bias" + input: "checkpoint_initializer_121" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_122/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_122/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_5/attention/self/key/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_122/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_122" + op: "RestoreV2" + input: "checkpoint_initializer_122/prefix" + input: "checkpoint_initializer_122/tensor_names" + input: "checkpoint_initializer_122/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_122" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/key/kernel" + input: "checkpoint_initializer_122" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_123/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_123/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_5/attention/self/query/bias" + } + } + } +} +node { + name: "checkpoint_initializer_123/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_123" + op: "RestoreV2" + input: "checkpoint_initializer_123/prefix" + input: "checkpoint_initializer_123/tensor_names" + input: "checkpoint_initializer_123/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_123" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/query/bias" + input: "checkpoint_initializer_123" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_124/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_124/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_5/attention/self/query/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_124/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_124" + op: "RestoreV2" + input: "checkpoint_initializer_124/prefix" + input: "checkpoint_initializer_124/tensor_names" + input: "checkpoint_initializer_124/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_124" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/query/kernel" + input: "checkpoint_initializer_124" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_125/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_125/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_5/attention/self/value/bias" + } + } + } +} +node { + name: "checkpoint_initializer_125/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_125" + op: "RestoreV2" + input: "checkpoint_initializer_125/prefix" + input: "checkpoint_initializer_125/tensor_names" + input: "checkpoint_initializer_125/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_125" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/value/bias" + input: "checkpoint_initializer_125" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_126/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_126/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_5/attention/self/value/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_126/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_126" + op: "RestoreV2" + input: "checkpoint_initializer_126/prefix" + input: "checkpoint_initializer_126/tensor_names" + input: "checkpoint_initializer_126/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_126" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/value/kernel" + input: "checkpoint_initializer_126" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_127/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_127/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_5/intermediate/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_127/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_127" + op: "RestoreV2" + input: "checkpoint_initializer_127/prefix" + input: "checkpoint_initializer_127/tensor_names" + input: "checkpoint_initializer_127/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_127" + op: "Assign" + input: "bert/encoder/layer_5/intermediate/dense/bias" + input: "checkpoint_initializer_127" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_128/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_128/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_5/intermediate/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_128/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_128" + op: "RestoreV2" + input: "checkpoint_initializer_128/prefix" + input: "checkpoint_initializer_128/tensor_names" + input: "checkpoint_initializer_128/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_128" + op: "Assign" + input: "bert/encoder/layer_5/intermediate/dense/kernel" + input: "checkpoint_initializer_128" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_129/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_129/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_5/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_129/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_129" + op: "RestoreV2" + input: "checkpoint_initializer_129/prefix" + input: "checkpoint_initializer_129/tensor_names" + input: "checkpoint_initializer_129/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_129" + op: "Assign" + input: "bert/encoder/layer_5/output/LayerNorm/beta" + input: "checkpoint_initializer_129" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_130/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_130/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_5/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_130/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_130" + op: "RestoreV2" + input: "checkpoint_initializer_130/prefix" + input: "checkpoint_initializer_130/tensor_names" + input: "checkpoint_initializer_130/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_130" + op: "Assign" + input: "bert/encoder/layer_5/output/LayerNorm/gamma" + input: "checkpoint_initializer_130" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_131/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_131/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_5/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_131/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_131" + op: "RestoreV2" + input: "checkpoint_initializer_131/prefix" + input: "checkpoint_initializer_131/tensor_names" + input: "checkpoint_initializer_131/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_131" + op: "Assign" + input: "bert/encoder/layer_5/output/dense/bias" + input: "checkpoint_initializer_131" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_132/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_132/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_5/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_132/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_132" + op: "RestoreV2" + input: "checkpoint_initializer_132/prefix" + input: "checkpoint_initializer_132/tensor_names" + input: "checkpoint_initializer_132/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_132" + op: "Assign" + input: "bert/encoder/layer_5/output/dense/kernel" + input: "checkpoint_initializer_132" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_133/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_133/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_133/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_133" + op: "RestoreV2" + input: "checkpoint_initializer_133/prefix" + input: "checkpoint_initializer_133/tensor_names" + input: "checkpoint_initializer_133/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_133" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta" + input: "checkpoint_initializer_133" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_134/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_134/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_134/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_134" + op: "RestoreV2" + input: "checkpoint_initializer_134/prefix" + input: "checkpoint_initializer_134/tensor_names" + input: "checkpoint_initializer_134/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_134" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma" + input: "checkpoint_initializer_134" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_135/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_135/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_6/attention/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_135/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_135" + op: "RestoreV2" + input: "checkpoint_initializer_135/prefix" + input: "checkpoint_initializer_135/tensor_names" + input: "checkpoint_initializer_135/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_135" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/dense/bias" + input: "checkpoint_initializer_135" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_136/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_136/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_6/attention/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_136/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_136" + op: "RestoreV2" + input: "checkpoint_initializer_136/prefix" + input: "checkpoint_initializer_136/tensor_names" + input: "checkpoint_initializer_136/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_136" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/dense/kernel" + input: "checkpoint_initializer_136" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_137/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_137/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_6/attention/self/key/bias" + } + } + } +} +node { + name: "checkpoint_initializer_137/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_137" + op: "RestoreV2" + input: "checkpoint_initializer_137/prefix" + input: "checkpoint_initializer_137/tensor_names" + input: "checkpoint_initializer_137/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_137" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/key/bias" + input: "checkpoint_initializer_137" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_138/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_138/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_6/attention/self/key/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_138/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_138" + op: "RestoreV2" + input: "checkpoint_initializer_138/prefix" + input: "checkpoint_initializer_138/tensor_names" + input: "checkpoint_initializer_138/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_138" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/key/kernel" + input: "checkpoint_initializer_138" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_139/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_139/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_6/attention/self/query/bias" + } + } + } +} +node { + name: "checkpoint_initializer_139/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_139" + op: "RestoreV2" + input: "checkpoint_initializer_139/prefix" + input: "checkpoint_initializer_139/tensor_names" + input: "checkpoint_initializer_139/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_139" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/query/bias" + input: "checkpoint_initializer_139" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_140/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_140/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_6/attention/self/query/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_140/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_140" + op: "RestoreV2" + input: "checkpoint_initializer_140/prefix" + input: "checkpoint_initializer_140/tensor_names" + input: "checkpoint_initializer_140/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_140" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/query/kernel" + input: "checkpoint_initializer_140" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_141/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_141/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_6/attention/self/value/bias" + } + } + } +} +node { + name: "checkpoint_initializer_141/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_141" + op: "RestoreV2" + input: "checkpoint_initializer_141/prefix" + input: "checkpoint_initializer_141/tensor_names" + input: "checkpoint_initializer_141/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_141" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/value/bias" + input: "checkpoint_initializer_141" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_142/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_142/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_6/attention/self/value/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_142/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_142" + op: "RestoreV2" + input: "checkpoint_initializer_142/prefix" + input: "checkpoint_initializer_142/tensor_names" + input: "checkpoint_initializer_142/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_142" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/value/kernel" + input: "checkpoint_initializer_142" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_143/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_143/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_6/intermediate/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_143/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_143" + op: "RestoreV2" + input: "checkpoint_initializer_143/prefix" + input: "checkpoint_initializer_143/tensor_names" + input: "checkpoint_initializer_143/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_143" + op: "Assign" + input: "bert/encoder/layer_6/intermediate/dense/bias" + input: "checkpoint_initializer_143" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_144/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_144/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_6/intermediate/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_144/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_144" + op: "RestoreV2" + input: "checkpoint_initializer_144/prefix" + input: "checkpoint_initializer_144/tensor_names" + input: "checkpoint_initializer_144/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_144" + op: "Assign" + input: "bert/encoder/layer_6/intermediate/dense/kernel" + input: "checkpoint_initializer_144" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_145/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_145/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_6/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_145/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_145" + op: "RestoreV2" + input: "checkpoint_initializer_145/prefix" + input: "checkpoint_initializer_145/tensor_names" + input: "checkpoint_initializer_145/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_145" + op: "Assign" + input: "bert/encoder/layer_6/output/LayerNorm/beta" + input: "checkpoint_initializer_145" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_146/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_146/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_6/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_146/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_146" + op: "RestoreV2" + input: "checkpoint_initializer_146/prefix" + input: "checkpoint_initializer_146/tensor_names" + input: "checkpoint_initializer_146/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_146" + op: "Assign" + input: "bert/encoder/layer_6/output/LayerNorm/gamma" + input: "checkpoint_initializer_146" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_147/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_147/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_6/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_147/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_147" + op: "RestoreV2" + input: "checkpoint_initializer_147/prefix" + input: "checkpoint_initializer_147/tensor_names" + input: "checkpoint_initializer_147/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_147" + op: "Assign" + input: "bert/encoder/layer_6/output/dense/bias" + input: "checkpoint_initializer_147" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_148/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_148/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_6/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_148/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_148" + op: "RestoreV2" + input: "checkpoint_initializer_148/prefix" + input: "checkpoint_initializer_148/tensor_names" + input: "checkpoint_initializer_148/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_148" + op: "Assign" + input: "bert/encoder/layer_6/output/dense/kernel" + input: "checkpoint_initializer_148" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_149/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_149/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_149/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_149" + op: "RestoreV2" + input: "checkpoint_initializer_149/prefix" + input: "checkpoint_initializer_149/tensor_names" + input: "checkpoint_initializer_149/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_149" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta" + input: "checkpoint_initializer_149" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_150/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_150/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_150/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_150" + op: "RestoreV2" + input: "checkpoint_initializer_150/prefix" + input: "checkpoint_initializer_150/tensor_names" + input: "checkpoint_initializer_150/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_150" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma" + input: "checkpoint_initializer_150" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_151/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_151/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_7/attention/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_151/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_151" + op: "RestoreV2" + input: "checkpoint_initializer_151/prefix" + input: "checkpoint_initializer_151/tensor_names" + input: "checkpoint_initializer_151/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_151" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/dense/bias" + input: "checkpoint_initializer_151" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_152/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_152/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_7/attention/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_152/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_152" + op: "RestoreV2" + input: "checkpoint_initializer_152/prefix" + input: "checkpoint_initializer_152/tensor_names" + input: "checkpoint_initializer_152/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_152" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/dense/kernel" + input: "checkpoint_initializer_152" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_153/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_153/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_7/attention/self/key/bias" + } + } + } +} +node { + name: "checkpoint_initializer_153/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_153" + op: "RestoreV2" + input: "checkpoint_initializer_153/prefix" + input: "checkpoint_initializer_153/tensor_names" + input: "checkpoint_initializer_153/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_153" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/key/bias" + input: "checkpoint_initializer_153" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_154/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_154/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_7/attention/self/key/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_154/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_154" + op: "RestoreV2" + input: "checkpoint_initializer_154/prefix" + input: "checkpoint_initializer_154/tensor_names" + input: "checkpoint_initializer_154/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_154" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/key/kernel" + input: "checkpoint_initializer_154" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_155/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_155/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_7/attention/self/query/bias" + } + } + } +} +node { + name: "checkpoint_initializer_155/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_155" + op: "RestoreV2" + input: "checkpoint_initializer_155/prefix" + input: "checkpoint_initializer_155/tensor_names" + input: "checkpoint_initializer_155/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_155" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/query/bias" + input: "checkpoint_initializer_155" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_156/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_156/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_7/attention/self/query/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_156/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_156" + op: "RestoreV2" + input: "checkpoint_initializer_156/prefix" + input: "checkpoint_initializer_156/tensor_names" + input: "checkpoint_initializer_156/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_156" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/query/kernel" + input: "checkpoint_initializer_156" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_157/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_157/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_7/attention/self/value/bias" + } + } + } +} +node { + name: "checkpoint_initializer_157/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_157" + op: "RestoreV2" + input: "checkpoint_initializer_157/prefix" + input: "checkpoint_initializer_157/tensor_names" + input: "checkpoint_initializer_157/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_157" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/value/bias" + input: "checkpoint_initializer_157" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_158/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_158/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_7/attention/self/value/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_158/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_158" + op: "RestoreV2" + input: "checkpoint_initializer_158/prefix" + input: "checkpoint_initializer_158/tensor_names" + input: "checkpoint_initializer_158/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_158" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/value/kernel" + input: "checkpoint_initializer_158" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_159/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_159/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_7/intermediate/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_159/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_159" + op: "RestoreV2" + input: "checkpoint_initializer_159/prefix" + input: "checkpoint_initializer_159/tensor_names" + input: "checkpoint_initializer_159/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_159" + op: "Assign" + input: "bert/encoder/layer_7/intermediate/dense/bias" + input: "checkpoint_initializer_159" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_160/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_160/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_7/intermediate/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_160/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_160" + op: "RestoreV2" + input: "checkpoint_initializer_160/prefix" + input: "checkpoint_initializer_160/tensor_names" + input: "checkpoint_initializer_160/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_160" + op: "Assign" + input: "bert/encoder/layer_7/intermediate/dense/kernel" + input: "checkpoint_initializer_160" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_161/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_161/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_7/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_161/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_161" + op: "RestoreV2" + input: "checkpoint_initializer_161/prefix" + input: "checkpoint_initializer_161/tensor_names" + input: "checkpoint_initializer_161/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_161" + op: "Assign" + input: "bert/encoder/layer_7/output/LayerNorm/beta" + input: "checkpoint_initializer_161" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_162/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_162/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_7/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_162/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_162" + op: "RestoreV2" + input: "checkpoint_initializer_162/prefix" + input: "checkpoint_initializer_162/tensor_names" + input: "checkpoint_initializer_162/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_162" + op: "Assign" + input: "bert/encoder/layer_7/output/LayerNorm/gamma" + input: "checkpoint_initializer_162" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_163/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_163/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_7/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_163/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_163" + op: "RestoreV2" + input: "checkpoint_initializer_163/prefix" + input: "checkpoint_initializer_163/tensor_names" + input: "checkpoint_initializer_163/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_163" + op: "Assign" + input: "bert/encoder/layer_7/output/dense/bias" + input: "checkpoint_initializer_163" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_164/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_164/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_7/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_164/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_164" + op: "RestoreV2" + input: "checkpoint_initializer_164/prefix" + input: "checkpoint_initializer_164/tensor_names" + input: "checkpoint_initializer_164/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_164" + op: "Assign" + input: "bert/encoder/layer_7/output/dense/kernel" + input: "checkpoint_initializer_164" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_165/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_165/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_165/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_165" + op: "RestoreV2" + input: "checkpoint_initializer_165/prefix" + input: "checkpoint_initializer_165/tensor_names" + input: "checkpoint_initializer_165/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_165" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta" + input: "checkpoint_initializer_165" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_166/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_166/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_166/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_166" + op: "RestoreV2" + input: "checkpoint_initializer_166/prefix" + input: "checkpoint_initializer_166/tensor_names" + input: "checkpoint_initializer_166/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_166" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma" + input: "checkpoint_initializer_166" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_167/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_167/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_8/attention/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_167/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_167" + op: "RestoreV2" + input: "checkpoint_initializer_167/prefix" + input: "checkpoint_initializer_167/tensor_names" + input: "checkpoint_initializer_167/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_167" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/dense/bias" + input: "checkpoint_initializer_167" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_168/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_168/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_8/attention/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_168/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_168" + op: "RestoreV2" + input: "checkpoint_initializer_168/prefix" + input: "checkpoint_initializer_168/tensor_names" + input: "checkpoint_initializer_168/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_168" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/dense/kernel" + input: "checkpoint_initializer_168" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_169/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_169/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_8/attention/self/key/bias" + } + } + } +} +node { + name: "checkpoint_initializer_169/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_169" + op: "RestoreV2" + input: "checkpoint_initializer_169/prefix" + input: "checkpoint_initializer_169/tensor_names" + input: "checkpoint_initializer_169/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_169" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/key/bias" + input: "checkpoint_initializer_169" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_170/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_170/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_8/attention/self/key/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_170/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_170" + op: "RestoreV2" + input: "checkpoint_initializer_170/prefix" + input: "checkpoint_initializer_170/tensor_names" + input: "checkpoint_initializer_170/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_170" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/key/kernel" + input: "checkpoint_initializer_170" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_171/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_171/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_8/attention/self/query/bias" + } + } + } +} +node { + name: "checkpoint_initializer_171/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_171" + op: "RestoreV2" + input: "checkpoint_initializer_171/prefix" + input: "checkpoint_initializer_171/tensor_names" + input: "checkpoint_initializer_171/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_171" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/query/bias" + input: "checkpoint_initializer_171" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_172/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_172/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_8/attention/self/query/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_172/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_172" + op: "RestoreV2" + input: "checkpoint_initializer_172/prefix" + input: "checkpoint_initializer_172/tensor_names" + input: "checkpoint_initializer_172/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_172" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/query/kernel" + input: "checkpoint_initializer_172" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_173/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_173/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_8/attention/self/value/bias" + } + } + } +} +node { + name: "checkpoint_initializer_173/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_173" + op: "RestoreV2" + input: "checkpoint_initializer_173/prefix" + input: "checkpoint_initializer_173/tensor_names" + input: "checkpoint_initializer_173/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_173" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/value/bias" + input: "checkpoint_initializer_173" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_174/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_174/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_8/attention/self/value/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_174/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_174" + op: "RestoreV2" + input: "checkpoint_initializer_174/prefix" + input: "checkpoint_initializer_174/tensor_names" + input: "checkpoint_initializer_174/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_174" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/value/kernel" + input: "checkpoint_initializer_174" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_175/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_175/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_8/intermediate/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_175/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_175" + op: "RestoreV2" + input: "checkpoint_initializer_175/prefix" + input: "checkpoint_initializer_175/tensor_names" + input: "checkpoint_initializer_175/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_175" + op: "Assign" + input: "bert/encoder/layer_8/intermediate/dense/bias" + input: "checkpoint_initializer_175" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_176/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_176/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_8/intermediate/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_176/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_176" + op: "RestoreV2" + input: "checkpoint_initializer_176/prefix" + input: "checkpoint_initializer_176/tensor_names" + input: "checkpoint_initializer_176/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_176" + op: "Assign" + input: "bert/encoder/layer_8/intermediate/dense/kernel" + input: "checkpoint_initializer_176" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_177/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_177/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_8/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_177/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_177" + op: "RestoreV2" + input: "checkpoint_initializer_177/prefix" + input: "checkpoint_initializer_177/tensor_names" + input: "checkpoint_initializer_177/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_177" + op: "Assign" + input: "bert/encoder/layer_8/output/LayerNorm/beta" + input: "checkpoint_initializer_177" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_178/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_178/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_8/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_178/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_178" + op: "RestoreV2" + input: "checkpoint_initializer_178/prefix" + input: "checkpoint_initializer_178/tensor_names" + input: "checkpoint_initializer_178/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_178" + op: "Assign" + input: "bert/encoder/layer_8/output/LayerNorm/gamma" + input: "checkpoint_initializer_178" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_179/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_179/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_8/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_179/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_179" + op: "RestoreV2" + input: "checkpoint_initializer_179/prefix" + input: "checkpoint_initializer_179/tensor_names" + input: "checkpoint_initializer_179/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_179" + op: "Assign" + input: "bert/encoder/layer_8/output/dense/bias" + input: "checkpoint_initializer_179" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_180/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_180/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_8/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_180/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_180" + op: "RestoreV2" + input: "checkpoint_initializer_180/prefix" + input: "checkpoint_initializer_180/tensor_names" + input: "checkpoint_initializer_180/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_180" + op: "Assign" + input: "bert/encoder/layer_8/output/dense/kernel" + input: "checkpoint_initializer_180" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_181/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_181/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_181/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_181" + op: "RestoreV2" + input: "checkpoint_initializer_181/prefix" + input: "checkpoint_initializer_181/tensor_names" + input: "checkpoint_initializer_181/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_181" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta" + input: "checkpoint_initializer_181" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_182/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_182/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_182/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_182" + op: "RestoreV2" + input: "checkpoint_initializer_182/prefix" + input: "checkpoint_initializer_182/tensor_names" + input: "checkpoint_initializer_182/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_182" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma" + input: "checkpoint_initializer_182" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_183/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_183/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_9/attention/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_183/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_183" + op: "RestoreV2" + input: "checkpoint_initializer_183/prefix" + input: "checkpoint_initializer_183/tensor_names" + input: "checkpoint_initializer_183/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_183" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/dense/bias" + input: "checkpoint_initializer_183" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_184/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_184/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_9/attention/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_184/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_184" + op: "RestoreV2" + input: "checkpoint_initializer_184/prefix" + input: "checkpoint_initializer_184/tensor_names" + input: "checkpoint_initializer_184/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_184" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/dense/kernel" + input: "checkpoint_initializer_184" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_185/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_185/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_9/attention/self/key/bias" + } + } + } +} +node { + name: "checkpoint_initializer_185/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_185" + op: "RestoreV2" + input: "checkpoint_initializer_185/prefix" + input: "checkpoint_initializer_185/tensor_names" + input: "checkpoint_initializer_185/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_185" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/key/bias" + input: "checkpoint_initializer_185" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_186/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_186/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_9/attention/self/key/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_186/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_186" + op: "RestoreV2" + input: "checkpoint_initializer_186/prefix" + input: "checkpoint_initializer_186/tensor_names" + input: "checkpoint_initializer_186/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_186" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/key/kernel" + input: "checkpoint_initializer_186" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_187/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_187/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_9/attention/self/query/bias" + } + } + } +} +node { + name: "checkpoint_initializer_187/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_187" + op: "RestoreV2" + input: "checkpoint_initializer_187/prefix" + input: "checkpoint_initializer_187/tensor_names" + input: "checkpoint_initializer_187/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_187" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/query/bias" + input: "checkpoint_initializer_187" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_188/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_188/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_9/attention/self/query/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_188/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_188" + op: "RestoreV2" + input: "checkpoint_initializer_188/prefix" + input: "checkpoint_initializer_188/tensor_names" + input: "checkpoint_initializer_188/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_188" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/query/kernel" + input: "checkpoint_initializer_188" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_189/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_189/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_9/attention/self/value/bias" + } + } + } +} +node { + name: "checkpoint_initializer_189/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_189" + op: "RestoreV2" + input: "checkpoint_initializer_189/prefix" + input: "checkpoint_initializer_189/tensor_names" + input: "checkpoint_initializer_189/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_189" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/value/bias" + input: "checkpoint_initializer_189" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_190/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_190/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_9/attention/self/value/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_190/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_190" + op: "RestoreV2" + input: "checkpoint_initializer_190/prefix" + input: "checkpoint_initializer_190/tensor_names" + input: "checkpoint_initializer_190/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_190" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/value/kernel" + input: "checkpoint_initializer_190" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_191/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_191/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_9/intermediate/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_191/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_191" + op: "RestoreV2" + input: "checkpoint_initializer_191/prefix" + input: "checkpoint_initializer_191/tensor_names" + input: "checkpoint_initializer_191/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_191" + op: "Assign" + input: "bert/encoder/layer_9/intermediate/dense/bias" + input: "checkpoint_initializer_191" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_192/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_192/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_9/intermediate/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_192/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_192" + op: "RestoreV2" + input: "checkpoint_initializer_192/prefix" + input: "checkpoint_initializer_192/tensor_names" + input: "checkpoint_initializer_192/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_192" + op: "Assign" + input: "bert/encoder/layer_9/intermediate/dense/kernel" + input: "checkpoint_initializer_192" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_193/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_193/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_9/output/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_193/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_193" + op: "RestoreV2" + input: "checkpoint_initializer_193/prefix" + input: "checkpoint_initializer_193/tensor_names" + input: "checkpoint_initializer_193/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_193" + op: "Assign" + input: "bert/encoder/layer_9/output/LayerNorm/beta" + input: "checkpoint_initializer_193" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_194/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_194/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_9/output/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_194/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_194" + op: "RestoreV2" + input: "checkpoint_initializer_194/prefix" + input: "checkpoint_initializer_194/tensor_names" + input: "checkpoint_initializer_194/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_194" + op: "Assign" + input: "bert/encoder/layer_9/output/LayerNorm/gamma" + input: "checkpoint_initializer_194" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_195/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_195/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_9/output/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_195/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_195" + op: "RestoreV2" + input: "checkpoint_initializer_195/prefix" + input: "checkpoint_initializer_195/tensor_names" + input: "checkpoint_initializer_195/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_195" + op: "Assign" + input: "bert/encoder/layer_9/output/dense/bias" + input: "checkpoint_initializer_195" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_196/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_196/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/encoder/layer_9/output/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_196/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_196" + op: "RestoreV2" + input: "checkpoint_initializer_196/prefix" + input: "checkpoint_initializer_196/tensor_names" + input: "checkpoint_initializer_196/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_196" + op: "Assign" + input: "bert/encoder/layer_9/output/dense/kernel" + input: "checkpoint_initializer_196" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_197/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_197/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/pooler/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_197/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_197" + op: "RestoreV2" + input: "checkpoint_initializer_197/prefix" + input: "checkpoint_initializer_197/tensor_names" + input: "checkpoint_initializer_197/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_197" + op: "Assign" + input: "bert/pooler/dense/bias" + input: "checkpoint_initializer_197" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_198/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_198/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "bert/pooler/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_198/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_198" + op: "RestoreV2" + input: "checkpoint_initializer_198/prefix" + input: "checkpoint_initializer_198/tensor_names" + input: "checkpoint_initializer_198/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_198" + op: "Assign" + input: "bert/pooler/dense/kernel" + input: "checkpoint_initializer_198" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_199/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_199/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "cls/predictions/output_bias" + } + } + } +} +node { + name: "checkpoint_initializer_199/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_199" + op: "RestoreV2" + input: "checkpoint_initializer_199/prefix" + input: "checkpoint_initializer_199/tensor_names" + input: "checkpoint_initializer_199/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_199" + op: "Assign" + input: "cls/predictions/output_bias" + input: "checkpoint_initializer_199" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_200/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_200/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "cls/predictions/transform/LayerNorm/beta" + } + } + } +} +node { + name: "checkpoint_initializer_200/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_200" + op: "RestoreV2" + input: "checkpoint_initializer_200/prefix" + input: "checkpoint_initializer_200/tensor_names" + input: "checkpoint_initializer_200/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_200" + op: "Assign" + input: "cls/predictions/transform/LayerNorm/beta" + input: "checkpoint_initializer_200" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_201/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_201/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "cls/predictions/transform/LayerNorm/gamma" + } + } + } +} +node { + name: "checkpoint_initializer_201/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_201" + op: "RestoreV2" + input: "checkpoint_initializer_201/prefix" + input: "checkpoint_initializer_201/tensor_names" + input: "checkpoint_initializer_201/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_201" + op: "Assign" + input: "cls/predictions/transform/LayerNorm/gamma" + input: "checkpoint_initializer_201" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_202/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_202/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "cls/predictions/transform/dense/bias" + } + } + } +} +node { + name: "checkpoint_initializer_202/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_202" + op: "RestoreV2" + input: "checkpoint_initializer_202/prefix" + input: "checkpoint_initializer_202/tensor_names" + input: "checkpoint_initializer_202/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_202" + op: "Assign" + input: "cls/predictions/transform/dense/bias" + input: "checkpoint_initializer_202" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_203/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_203/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "cls/predictions/transform/dense/kernel" + } + } + } +} +node { + name: "checkpoint_initializer_203/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_203" + op: "RestoreV2" + input: "checkpoint_initializer_203/prefix" + input: "checkpoint_initializer_203/tensor_names" + input: "checkpoint_initializer_203/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_203" + op: "Assign" + input: "cls/predictions/transform/dense/kernel" + input: "checkpoint_initializer_203" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_204/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_204/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "cls/seq_relationship/output_bias" + } + } + } +} +node { + name: "checkpoint_initializer_204/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_204" + op: "RestoreV2" + input: "checkpoint_initializer_204/prefix" + input: "checkpoint_initializer_204/tensor_names" + input: "checkpoint_initializer_204/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_204" + op: "Assign" + input: "cls/seq_relationship/output_bias" + input: "checkpoint_initializer_204" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "checkpoint_initializer_205/prefix" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/home/ema30/raid1/clinical_BERT/clinical_BERT/models/biobert/pubmed_pmc_470k/biobert_model.ckpt" + } + } + } +} +node { + name: "checkpoint_initializer_205/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "cls/seq_relationship/output_weights" + } + } + } +} +node { + name: "checkpoint_initializer_205/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "" + } + } + } +} +node { + name: "checkpoint_initializer_205" + op: "RestoreV2" + input: "checkpoint_initializer_205/prefix" + input: "checkpoint_initializer_205/tensor_names" + input: "checkpoint_initializer_205/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + } + } + } +} +node { + name: "Assign_205" + op: "Assign" + input: "cls/seq_relationship/output_weights" + input: "checkpoint_initializer_205" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Const_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 4.999999873689376e-05 + } + } + } +} +node { + name: "PolynomialDecay/Cast/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "PolynomialDecay/Cast_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "PolynomialDecay/Cast_2/ReadVariableOp" + op: "ReadVariableOp" + input: "global_step" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } +} +node { + name: "PolynomialDecay/Cast_2" + op: "Cast" + input: "PolynomialDecay/Cast_2/ReadVariableOp" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT64 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "PolynomialDecay/Cast_3/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 150000 + } + } + } +} +node { + name: "PolynomialDecay/Cast_3" + op: "Cast" + input: "PolynomialDecay/Cast_3/x" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "PolynomialDecay/Minimum/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 150000.0 + } + } + } +} +node { + name: "PolynomialDecay/Minimum" + op: "Minimum" + input: "PolynomialDecay/Cast_2" + input: "PolynomialDecay/Minimum/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "PolynomialDecay/div" + op: "RealDiv" + input: "PolynomialDecay/Minimum" + input: "PolynomialDecay/Cast_3" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "PolynomialDecay/sub" + op: "Sub" + input: "Const_1" + input: "PolynomialDecay/Cast/x" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "PolynomialDecay/sub_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "PolynomialDecay/sub_1" + op: "Sub" + input: "PolynomialDecay/sub_1/x" + input: "PolynomialDecay/div" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "PolynomialDecay/Pow" + op: "Pow" + input: "PolynomialDecay/sub_1" + input: "PolynomialDecay/Cast_1/x" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "PolynomialDecay/Mul" + op: "Mul" + input: "PolynomialDecay/sub" + input: "PolynomialDecay/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "PolynomialDecay" + op: "Add" + input: "PolynomialDecay/Mul" + input: "PolynomialDecay/Cast/x" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Cast/ReadVariableOp" + op: "ReadVariableOp" + input: "global_step" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } +} +node { + name: "Cast" + op: "Cast" + input: "Cast/ReadVariableOp" + attr { + key: "DstT" + value { + type: DT_INT32 + } + } + attr { + key: "SrcT" + value { + type: DT_INT64 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Const_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 15000 + } + } + } +} +node { + name: "Cast_1" + op: "Cast" + input: "Cast" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Cast_2" + op: "Cast" + input: "Const_2" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "truediv" + op: "RealDiv" + input: "Cast_1" + input: "Cast_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 4.999999873689376e-05 + } + } + } +} +node { + name: "mul_1" + op: "Mul" + input: "mul_1/x" + input: "truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Less" + op: "Less" + input: "Cast" + input: "Const_2" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Cast_3" + op: "Cast" + input: "Less" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_BOOL + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "sub" + op: "Sub" + input: "sub/x" + input: "Cast_3" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_2" + op: "Mul" + input: "sub" + input: "PolynomialDecay" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_3" + op: "Mul" + input: "Cast_3" + input: "mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "add_2" + op: "Add" + input: "mul_2" + input: "mul_3" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/grad_ys_0" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/Fill" + op: "Fill" + input: "gradients/Shape" + input: "gradients/grad_ys_0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/cls/predictions/truediv_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/truediv_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/truediv_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/cls/predictions/truediv_grad/Shape" + input: "gradients/cls/predictions/truediv_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/truediv_grad/RealDiv" + op: "RealDiv" + input: "gradients/Fill" + input: "cls/predictions/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/cls/predictions/truediv_grad/Sum" + op: "Sum" + input: "gradients/cls/predictions/truediv_grad/RealDiv" + input: "gradients/cls/predictions/truediv_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/cls/predictions/truediv_grad/Reshape" + op: "Reshape" + input: "gradients/cls/predictions/truediv_grad/Sum" + input: "gradients/cls/predictions/truediv_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/cls/predictions/truediv_grad/Neg" + op: "Neg" + input: "cls/predictions/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/cls/predictions/truediv_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/cls/predictions/truediv_grad/Neg" + input: "cls/predictions/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/cls/predictions/truediv_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/cls/predictions/truediv_grad/RealDiv_1" + input: "cls/predictions/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/cls/predictions/truediv_grad/mul" + op: "Mul" + input: "gradients/Fill" + input: "gradients/cls/predictions/truediv_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/cls/predictions/truediv_grad/Sum_1" + op: "Sum" + input: "gradients/cls/predictions/truediv_grad/mul" + input: "gradients/cls/predictions/truediv_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/cls/predictions/truediv_grad/Reshape_1" + op: "Reshape" + input: "gradients/cls/predictions/truediv_grad/Sum_1" + input: "gradients/cls/predictions/truediv_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Mean_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Mean_grad/Reshape" + op: "Reshape" + input: "gradients/Fill" + input: "gradients/cls/seq_relationship/Mean_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 32 + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Mean_grad/Tile" + op: "Tile" + input: "gradients/cls/seq_relationship/Mean_grad/Reshape" + input: "gradients/cls/seq_relationship/Mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + } + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Mean_grad/Const_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 32.0 + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Mean_grad/truediv" + op: "RealDiv" + input: "gradients/cls/seq_relationship/Mean_grad/Tile" + input: "gradients/cls/seq_relationship/Mean_grad/Const_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/Sum_1_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/cls/predictions/Sum_1_grad/Reshape" + op: "Reshape" + input: "gradients/cls/predictions/truediv_grad/Reshape" + input: "gradients/cls/predictions/Sum_1_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/Sum_1_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 640 + } + } + } +} +node { + name: "gradients/cls/predictions/Sum_1_grad/Tile" + op: "Tile" + input: "gradients/cls/predictions/Sum_1_grad/Reshape" + input: "gradients/cls/predictions/Sum_1_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + } + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Neg_grad/Neg" + op: "Neg" + input: "gradients/cls/seq_relationship/Mean_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/mul_1_grad/Mul" + op: "Mul" + input: "gradients/cls/predictions/Sum_1_grad/Tile" + input: "cls/predictions/Neg" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/cls/predictions/Sum_1_grad/Tile" + input: "cls/predictions/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + } + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Sum_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: " \000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Sum_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/seq_relationship/Sum_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Sum_grad/add" + op: "Add" + input: "cls/seq_relationship/Sum/reduction_indices" + input: "gradients/cls/seq_relationship/Sum_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/seq_relationship/Sum_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Sum_grad/mod" + op: "FloorMod" + input: "gradients/cls/seq_relationship/Sum_grad/add" + input: "gradients/cls/seq_relationship/Sum_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/seq_relationship/Sum_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Sum_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/seq_relationship/Sum_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Sum_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/seq_relationship/Sum_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Sum_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/seq_relationship/Sum_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Sum_grad/range" + op: "Range" + input: "gradients/cls/seq_relationship/Sum_grad/range/start" + input: "gradients/cls/seq_relationship/Sum_grad/Size" + input: "gradients/cls/seq_relationship/Sum_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/seq_relationship/Sum_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Sum_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/seq_relationship/Sum_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Sum_grad/Fill" + op: "Fill" + input: "gradients/cls/seq_relationship/Sum_grad/Shape_1" + input: "gradients/cls/seq_relationship/Sum_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/seq_relationship/Sum_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/cls/seq_relationship/Sum_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/cls/seq_relationship/Sum_grad/range" + input: "gradients/cls/seq_relationship/Sum_grad/mod" + input: "gradients/cls/seq_relationship/Sum_grad/Shape" + input: "gradients/cls/seq_relationship/Sum_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/seq_relationship/Sum_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Sum_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/seq_relationship/Sum_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Sum_grad/Maximum" + op: "Maximum" + input: "gradients/cls/seq_relationship/Sum_grad/DynamicStitch" + input: "gradients/cls/seq_relationship/Sum_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/seq_relationship/Sum_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Sum_grad/floordiv" + op: "FloorDiv" + input: "gradients/cls/seq_relationship/Sum_grad/Shape" + input: "gradients/cls/seq_relationship/Sum_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/seq_relationship/Sum_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Sum_grad/Reshape" + op: "Reshape" + input: "gradients/cls/seq_relationship/Neg_grad/Neg" + input: "gradients/cls/seq_relationship/Sum_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Sum_grad/Tile" + op: "Tile" + input: "gradients/cls/seq_relationship/Sum_grad/Reshape" + input: "gradients/cls/seq_relationship/Sum_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/Neg_grad/Neg" + op: "Neg" + input: "gradients/cls/predictions/mul_1_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + } + } + } + } +} +node { + name: "gradients/cls/seq_relationship/mul_grad/Mul" + op: "Mul" + input: "gradients/cls/seq_relationship/Sum_grad/Tile" + input: "cls/seq_relationship/LogSoftmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/cls/seq_relationship/mul_grad/Mul_1" + op: "Mul" + input: "gradients/cls/seq_relationship/Sum_grad/Tile" + input: "cls/seq_relationship/one_hot" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/Sum_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\200\002\000\000Dq\000\000" + } + } + } +} +node { + name: "gradients/cls/predictions/Sum_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/Sum_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/cls/predictions/Sum_grad/add" + op: "Add" + input: "cls/predictions/Sum/reduction_indices" + input: "gradients/cls/predictions/Sum_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/Sum_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/Sum_grad/mod" + op: "FloorMod" + input: "gradients/cls/predictions/Sum_grad/add" + input: "gradients/cls/predictions/Sum_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/Sum_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/Sum_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/Sum_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/cls/predictions/Sum_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/Sum_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/cls/predictions/Sum_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/Sum_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/cls/predictions/Sum_grad/range" + op: "Range" + input: "gradients/cls/predictions/Sum_grad/range/start" + input: "gradients/cls/predictions/Sum_grad/Size" + input: "gradients/cls/predictions/Sum_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/Sum_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/Sum_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/Sum_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/cls/predictions/Sum_grad/Fill" + op: "Fill" + input: "gradients/cls/predictions/Sum_grad/Shape_1" + input: "gradients/cls/predictions/Sum_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/Sum_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/cls/predictions/Sum_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/cls/predictions/Sum_grad/range" + input: "gradients/cls/predictions/Sum_grad/mod" + input: "gradients/cls/predictions/Sum_grad/Shape" + input: "gradients/cls/predictions/Sum_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/Sum_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/Sum_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/Sum_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/cls/predictions/Sum_grad/Maximum" + op: "Maximum" + input: "gradients/cls/predictions/Sum_grad/DynamicStitch" + input: "gradients/cls/predictions/Sum_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/Sum_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/Sum_grad/floordiv" + op: "FloorDiv" + input: "gradients/cls/predictions/Sum_grad/Shape" + input: "gradients/cls/predictions/Sum_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/Sum_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/Sum_grad/Reshape" + op: "Reshape" + input: "gradients/cls/predictions/Neg_grad/Neg" + input: "gradients/cls/predictions/Sum_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/Sum_grad/Tile" + op: "Tile" + input: "gradients/cls/predictions/Sum_grad/Reshape" + input: "gradients/cls/predictions/Sum_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 28996 + } + } + } + } + } +} +node { + name: "gradients/cls/seq_relationship/LogSoftmax_grad/Exp" + op: "Exp" + input: "cls/seq_relationship/LogSoftmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/cls/seq_relationship/LogSoftmax_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } +} +node { + name: "gradients/cls/seq_relationship/LogSoftmax_grad/Sum" + op: "Sum" + input: "gradients/cls/seq_relationship/mul_grad/Mul_1" + input: "gradients/cls/seq_relationship/LogSoftmax_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "gradients/cls/seq_relationship/LogSoftmax_grad/mul" + op: "Mul" + input: "gradients/cls/seq_relationship/LogSoftmax_grad/Sum" + input: "gradients/cls/seq_relationship/LogSoftmax_grad/Exp" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/cls/seq_relationship/LogSoftmax_grad/sub" + op: "Sub" + input: "gradients/cls/seq_relationship/mul_grad/Mul_1" + input: "gradients/cls/seq_relationship/LogSoftmax_grad/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/mul_grad/Mul" + op: "Mul" + input: "gradients/cls/predictions/Sum_grad/Tile" + input: "cls/predictions/one_hot" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 28996 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/mul_grad/Mul_1" + op: "Mul" + input: "gradients/cls/predictions/Sum_grad/Tile" + input: "cls/predictions/LogSoftmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 28996 + } + } + } + } + } +} +node { + name: "gradients/cls/seq_relationship/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/cls/seq_relationship/LogSoftmax_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/cls/predictions/LogSoftmax_grad/Exp" + op: "Exp" + input: "cls/predictions/LogSoftmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 28996 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/LogSoftmax_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } +} +node { + name: "gradients/cls/predictions/LogSoftmax_grad/Sum" + op: "Sum" + input: "gradients/cls/predictions/mul_grad/Mul" + input: "gradients/cls/predictions/LogSoftmax_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "gradients/cls/predictions/LogSoftmax_grad/mul" + op: "Mul" + input: "gradients/cls/predictions/LogSoftmax_grad/Sum" + input: "gradients/cls/predictions/LogSoftmax_grad/Exp" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 28996 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/LogSoftmax_grad/sub" + op: "Sub" + input: "gradients/cls/predictions/mul_grad/Mul" + input: "gradients/cls/predictions/LogSoftmax_grad/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 28996 + } + } + } + } + } +} +node { + name: "gradients/cls/seq_relationship/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/cls/seq_relationship/LogSoftmax_grad/sub" + input: "cls/seq_relationship/output_weights/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/cls/seq_relationship/MatMul_grad/MatMul_1" + op: "MatMul" + input: "gradients/cls/seq_relationship/LogSoftmax_grad/sub" + input: "bert/pooler/dense/Tanh" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/cls/predictions/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/cls/predictions/LogSoftmax_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/pooler/dense/Tanh_grad/TanhGrad" + op: "TanhGrad" + input: "bert/pooler/dense/Tanh" + input: "gradients/cls/seq_relationship/MatMul_grad/MatMul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/cls/predictions/LogSoftmax_grad/sub" + input: "bert/embeddings/word_embeddings/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/cls/predictions/MatMul_grad/MatMul_1" + op: "MatMul" + input: "gradients/cls/predictions/LogSoftmax_grad/sub" + input: "cls/predictions/transform/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/pooler/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/pooler/dense/Tanh_grad/TanhGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/pooler/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/pooler/dense/Tanh_grad/TanhGrad" + input: "bert/pooler/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/pooler/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/pooler/Squeeze" + input: "gradients/bert/pooler/dense/Tanh_grad/TanhGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/cls/predictions/MatMul_grad/MatMul" + input: "cls/predictions/transform/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/cls/predictions/MatMul_grad/MatMul" + input: "cls/predictions/transform/dense/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\200\002\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/cls/predictions/MatMul_grad/MatMul" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/cls/predictions/MatMul_grad/MatMul" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/pooler/Squeeze_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/pooler/Squeeze_grad/Reshape" + op: "Reshape" + input: "gradients/bert/pooler/dense/MatMul_grad/MatMul" + input: "gradients/bert/pooler/Squeeze_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\200\002\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\200\002\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "cls/predictions/transform/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "cls/predictions/transform/LayerNorm/moments/mean" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/pooler/strided_slice_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/pooler/strided_slice_grad/StridedSliceGrad" + op: "StridedSliceGrad" + input: "gradients/bert/pooler/strided_slice_grad/Shape" + input: "bert/pooler/strided_slice/stack" + input: "bert/pooler/strided_slice/stack_1" + input: "bert/pooler/strided_slice/stack_2" + input: "gradients/bert/pooler/Squeeze_grad/Reshape" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 5 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 5 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "gradients/AddN" + op: "AddN" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\200\002\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN" + input: "cls/predictions/transform/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "cls/predictions/transform/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "cls/predictions/transform/LayerNorm/batchnorm/Rsqrt" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\200\002\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\200\002\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "cls/predictions/transform/LayerNorm/moments/variance/reduction_indices" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/add" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/range/start" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Size" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/range" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/mod" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Shape" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Shape" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Tile" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\200\002\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\200\002\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/mul" + op: "Mul" + input: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "cls/predictions/transform/dense/mul_1" + input: "cls/predictions/transform/LayerNorm/moments/StopGradient" + input: "^gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/mul" + input: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\200\002\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "cls/predictions/transform/LayerNorm/moments/mean/reduction_indices" + input: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/add" + input: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/range/start" + input: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Size" + input: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/range" + input: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/mod" + input: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Shape" + input: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Shape" + input: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Tile" + input: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_1" + op: "AddN" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_1" + input: "cls/predictions/transform/dense/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_1" + input: "cls/predictions/transform/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\200\002\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/cls/predictions/transform/dense/mul_grad/Shape" + input: "gradients/cls/predictions/transform/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/mul_grad/Mul" + op: "Mul" + input: "gradients/cls/predictions/transform/dense/mul_1_grad/Mul_1" + input: "cls/predictions/transform/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/mul_grad/Sum" + op: "Sum" + input: "gradients/cls/predictions/transform/dense/mul_grad/Mul" + input: "gradients/cls/predictions/transform/dense/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/mul_grad/Reshape" + op: "Reshape" + input: "gradients/cls/predictions/transform/dense/mul_grad/Sum" + input: "gradients/cls/predictions/transform/dense/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/mul_grad/Mul_1" + op: "Mul" + input: "cls/predictions/transform/dense/mul/x" + input: "gradients/cls/predictions/transform/dense/mul_1_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/mul_grad/Sum_1" + op: "Sum" + input: "gradients/cls/predictions/transform/dense/mul_grad/Mul_1" + input: "gradients/cls/predictions/transform/dense/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/cls/predictions/transform/dense/mul_grad/Sum_1" + input: "gradients/cls/predictions/transform/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\200\002\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/cls/predictions/transform/dense/add_grad/Shape" + input: "gradients/cls/predictions/transform/dense/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/add_grad/Sum" + op: "Sum" + input: "gradients/cls/predictions/transform/dense/mul_grad/Reshape_1" + input: "gradients/cls/predictions/transform/dense/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/cls/predictions/transform/dense/add_grad/Sum" + input: "gradients/cls/predictions/transform/dense/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/add_grad/Sum_1" + op: "Sum" + input: "gradients/cls/predictions/transform/dense/mul_grad/Reshape_1" + input: "gradients/cls/predictions/transform/dense/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/cls/predictions/transform/dense/add_grad/Sum_1" + input: "gradients/cls/predictions/transform/dense/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/Erf_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.128379225730896 + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/Erf_grad/mul" + op: "Mul" + input: "gradients/cls/predictions/transform/dense/add_grad/Reshape_1" + input: "gradients/cls/predictions/transform/dense/Erf_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/Erf_grad/Square" + op: "Square" + input: "cls/predictions/transform/dense/truediv" + input: "^gradients/cls/predictions/transform/dense/add_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/Erf_grad/Neg" + op: "Neg" + input: "gradients/cls/predictions/transform/dense/Erf_grad/Square" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/Erf_grad/Exp" + op: "Exp" + input: "gradients/cls/predictions/transform/dense/Erf_grad/Neg" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/Erf_grad/mul_1" + op: "Mul" + input: "gradients/cls/predictions/transform/dense/Erf_grad/mul" + input: "gradients/cls/predictions/transform/dense/Erf_grad/Exp" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/truediv_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\200\002\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/truediv_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/truediv_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/cls/predictions/transform/dense/truediv_grad/Shape" + input: "gradients/cls/predictions/transform/dense/truediv_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/truediv_grad/RealDiv" + op: "RealDiv" + input: "gradients/cls/predictions/transform/dense/Erf_grad/mul_1" + input: "cls/predictions/transform/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/truediv_grad/Sum" + op: "Sum" + input: "gradients/cls/predictions/transform/dense/truediv_grad/RealDiv" + input: "gradients/cls/predictions/transform/dense/truediv_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/truediv_grad/Reshape" + op: "Reshape" + input: "gradients/cls/predictions/transform/dense/truediv_grad/Sum" + input: "gradients/cls/predictions/transform/dense/truediv_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/truediv_grad/Neg" + op: "Neg" + input: "cls/predictions/transform/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/truediv_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/cls/predictions/transform/dense/truediv_grad/Neg" + input: "cls/predictions/transform/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/truediv_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/cls/predictions/transform/dense/truediv_grad/RealDiv_1" + input: "cls/predictions/transform/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/truediv_grad/mul" + op: "Mul" + input: "gradients/cls/predictions/transform/dense/Erf_grad/mul_1" + input: "gradients/cls/predictions/transform/dense/truediv_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/truediv_grad/Sum_1" + op: "Sum" + input: "gradients/cls/predictions/transform/dense/truediv_grad/mul" + input: "gradients/cls/predictions/transform/dense/truediv_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/truediv_grad/Reshape_1" + op: "Reshape" + input: "gradients/cls/predictions/transform/dense/truediv_grad/Sum_1" + input: "gradients/cls/predictions/transform/dense/truediv_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/AddN_2" + op: "AddN" + input: "gradients/cls/predictions/transform/dense/mul_1_grad/Mul" + input: "gradients/cls/predictions/transform/dense/truediv_grad/Reshape" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/dense/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/AddN_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/AddN_2" + input: "cls/predictions/transform/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "GatherV2" + input: "gradients/AddN_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/GatherV2_grad/Shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@Reshape_2" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\000\000\000\000\003\000\000\000\000\000\000" + } + } + } +} +node { + name: "gradients/GatherV2_grad/ToInt32" + op: "Cast" + input: "gradients/GatherV2_grad/Shape" + attr { + key: "DstT" + value { + type: DT_INT32 + } + } + attr { + key: "SrcT" + value { + type: DT_INT64 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_class" + value { + list { + s: "loc:@Reshape_2" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/GatherV2_grad/Size" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 640 + } + } + } +} +node { + name: "gradients/GatherV2_grad/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/GatherV2_grad/ExpandDims" + op: "ExpandDims" + input: "gradients/GatherV2_grad/Size" + input: "gradients/GatherV2_grad/ExpandDims/dim" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tdim" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/GatherV2_grad/strided_slice/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/GatherV2_grad/strided_slice/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "gradients/GatherV2_grad/strided_slice/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/GatherV2_grad/strided_slice" + op: "StridedSlice" + input: "gradients/GatherV2_grad/ToInt32" + input: "gradients/GatherV2_grad/strided_slice/stack" + input: "gradients/GatherV2_grad/strided_slice/stack_1" + input: "gradients/GatherV2_grad/strided_slice/stack_2" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 1 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "gradients/GatherV2_grad/concat/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/GatherV2_grad/concat" + op: "ConcatV2" + input: "gradients/GatherV2_grad/ExpandDims" + input: "gradients/GatherV2_grad/strided_slice" + input: "gradients/GatherV2_grad/concat/axis" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/GatherV2_grad/Reshape" + op: "Reshape" + input: "gradients/cls/predictions/transform/dense/MatMul_grad/MatMul" + input: "gradients/GatherV2_grad/concat" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/GatherV2_grad/Reshape_1" + op: "Reshape" + input: "Reshape_1" + input: "gradients/GatherV2_grad/ExpandDims" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 640 + } + } + } + } + } +} +node { + name: "gradients/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/Reshape_2_grad/Reshape/strided_slice/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "gradients/Reshape_2_grad/Reshape/strided_slice/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/Reshape_2_grad/Reshape/strided_slice/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/Reshape_2_grad/Reshape/strided_slice" + op: "StridedSlice" + input: "gradients/GatherV2_grad/ToInt32" + input: "gradients/Reshape_2_grad/Reshape/strided_slice/stack" + input: "gradients/Reshape_2_grad/Reshape/strided_slice/stack_1" + input: "gradients/Reshape_2_grad/Reshape/strided_slice/stack_2" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 1 + } + } +} +node { + name: "gradients/Reshape_2_grad/Reshape/tensor" + op: "UnsortedSegmentSum" + input: "gradients/GatherV2_grad/Reshape" + input: "gradients/GatherV2_grad/Reshape_1" + input: "gradients/Reshape_2_grad/Reshape/strided_slice" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tindices" + value { + type: DT_INT32 + } + } + attr { + key: "Tnumsegments" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/Reshape_2_grad/Reshape/tensor" + input: "gradients/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_3" + op: "AddN" + input: "gradients/bert/pooler/strided_slice_grad/StridedSliceGrad" + input: "gradients/Reshape_2_grad/Reshape" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/pooler/strided_slice_grad/StridedSliceGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/Reshape_13_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/Reshape_13_grad/Reshape" + op: "Reshape" + input: "gradients/AddN_3" + input: "gradients/bert/encoder/Reshape_13_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/Reshape_13_grad/Reshape" + input: "bert/encoder/layer_11/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/Reshape_13_grad/Reshape" + input: "bert/encoder/layer_11/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/Reshape_13_grad/Reshape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/Reshape_13_grad/Reshape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_11/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_11/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_4" + op: "AddN" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_4" + input: "bert/encoder/layer_11/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_11/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_4" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_11/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_11/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_11/output/add" + input: "bert/encoder/layer_11/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_11/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/mul" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_11/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_5" + op: "AddN" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_5" + input: "bert/encoder/layer_11/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_5" + input: "bert/encoder/layer_11/output/dropout/div" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/dropout/div_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/dropout/div_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/dropout/div_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_11/output/dropout/div_grad/Shape" + input: "gradients/bert/encoder/layer_11/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/dropout/div_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/Mul" + input: "bert/encoder/layer_11/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/dropout/div_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_11/output/dropout/div_grad/RealDiv" + input: "gradients/bert/encoder/layer_11/output/dropout/div_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/dropout/div_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/output/dropout/div_grad/Sum" + input: "gradients/bert/encoder/layer_11/output/dropout/div_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/dropout/div_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_11/output/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/dropout/div_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_11/output/dropout/div_grad/Neg" + input: "bert/encoder/layer_11/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/dropout/div_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_11/output/dropout/div_grad/RealDiv_1" + input: "bert/encoder/layer_11/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/dropout/div_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_11/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_11/output/dropout/div_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/dropout/div_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_11/output/dropout/div_grad/mul" + input: "gradients/bert/encoder/layer_11/output/dropout/div_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/dropout/div_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/output/dropout/div_grad/Sum_1" + input: "gradients/bert/encoder/layer_11/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_11/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_11/output/dropout/div_grad/Reshape" + input: "bert/encoder/layer_11/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_11/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_11/intermediate/dense/mul_1" + input: "gradients/bert/encoder/layer_11/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_11/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_11/intermediate/dense/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_11/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_11/intermediate/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Shape" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/Mul_1" + input: "bert/encoder/layer_11/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Mul" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Sum" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_11/intermediate/dense/mul/x" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_11/intermediate/dense/add_grad/Shape" + input: "gradients/bert/encoder/layer_11/intermediate/dense/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Reshape_1" + input: "gradients/bert/encoder/layer_11/intermediate/dense/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/intermediate/dense/add_grad/Sum" + input: "gradients/bert/encoder/layer_11/intermediate/dense/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_grad/Reshape_1" + input: "gradients/bert/encoder/layer_11/intermediate/dense/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/intermediate/dense/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_11/intermediate/dense/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/Erf_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.128379225730896 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/Erf_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_11/intermediate/dense/add_grad/Reshape_1" + input: "gradients/bert/encoder/layer_11/intermediate/dense/Erf_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/Erf_grad/Square" + op: "Square" + input: "bert/encoder/layer_11/intermediate/dense/truediv" + input: "^gradients/bert/encoder/layer_11/intermediate/dense/add_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/Erf_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_11/intermediate/dense/Erf_grad/Square" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/Erf_grad/Exp" + op: "Exp" + input: "gradients/bert/encoder/layer_11/intermediate/dense/Erf_grad/Neg" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/Erf_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_11/intermediate/dense/Erf_grad/mul" + input: "gradients/bert/encoder/layer_11/intermediate/dense/Erf_grad/Exp" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/truediv_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/truediv_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/truediv_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_11/intermediate/dense/truediv_grad/Shape" + input: "gradients/bert/encoder/layer_11/intermediate/dense/truediv_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/truediv_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_11/intermediate/dense/Erf_grad/mul_1" + input: "bert/encoder/layer_11/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/truediv_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_11/intermediate/dense/truediv_grad/RealDiv" + input: "gradients/bert/encoder/layer_11/intermediate/dense/truediv_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/truediv_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/intermediate/dense/truediv_grad/Sum" + input: "gradients/bert/encoder/layer_11/intermediate/dense/truediv_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/truediv_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_11/intermediate/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/truediv_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_11/intermediate/dense/truediv_grad/Neg" + input: "bert/encoder/layer_11/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/truediv_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_11/intermediate/dense/truediv_grad/RealDiv_1" + input: "bert/encoder/layer_11/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/truediv_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_11/intermediate/dense/Erf_grad/mul_1" + input: "gradients/bert/encoder/layer_11/intermediate/dense/truediv_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/truediv_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_11/intermediate/dense/truediv_grad/mul" + input: "gradients/bert/encoder/layer_11/intermediate/dense/truediv_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/truediv_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/intermediate/dense/truediv_grad/Sum_1" + input: "gradients/bert/encoder/layer_11/intermediate/dense/truediv_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/AddN_6" + op: "AddN" + input: "gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_11/intermediate/dense/truediv_grad/Reshape" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/intermediate/dense/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/AddN_6" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/AddN_6" + input: "bert/encoder/layer_11/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_11/intermediate/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_1" + input: "gradients/AddN_6" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_7" + op: "AddN" + input: "gradients/AddN_5" + input: "gradients/bert/encoder/layer_11/intermediate/dense/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_7" + input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_7" + input: "bert/encoder/layer_11/attention/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_7" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_7" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_11/attention/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_8" + op: "AddN" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_8" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_8" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_11/attention/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_11/attention/output/add" + input: "bert/encoder/layer_11/attention/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/mul" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_11/attention/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_9" + op: "AddN" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_9" + input: "bert/encoder/layer_11/attention/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_9" + input: "bert/encoder/layer_11/attention/output/dropout/div" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/dropout/div_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/dropout/div_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/dropout/div_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_11/attention/output/dropout/div_grad/Shape" + input: "gradients/bert/encoder/layer_11/attention/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/dropout/div_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/Mul" + input: "bert/encoder/layer_11/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/dropout/div_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_11/attention/output/dropout/div_grad/RealDiv" + input: "gradients/bert/encoder/layer_11/attention/output/dropout/div_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/dropout/div_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/output/dropout/div_grad/Sum" + input: "gradients/bert/encoder/layer_11/attention/output/dropout/div_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/dropout/div_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_11/attention/output/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/dropout/div_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_11/attention/output/dropout/div_grad/Neg" + input: "bert/encoder/layer_11/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/dropout/div_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_11/attention/output/dropout/div_grad/RealDiv_1" + input: "bert/encoder/layer_11/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/dropout/div_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_11/attention/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_11/attention/output/dropout/div_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/dropout/div_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_11/attention/output/dropout/div_grad/mul" + input: "gradients/bert/encoder/layer_11/attention/output/dropout/div_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/dropout/div_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/output/dropout/div_grad/Sum_1" + input: "gradients/bert/encoder/layer_11/attention/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_11/attention/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_11/attention/output/dropout/div_grad/Reshape" + input: "bert/encoder/layer_11/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_11/attention/self/Reshape_3" + input: "gradients/bert/encoder/layer_11/attention/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/Reshape_3_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/Reshape_3_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/output/dense/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_11/attention/self/Reshape_3_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/transpose_3_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_11/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/transpose_3_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_11/attention/self/Reshape_3_grad/Reshape" + input: "gradients/bert/encoder/layer_11/attention/self/transpose_3_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/MatMul" + op: "BatchMatMul" + input: "gradients/bert/encoder/layer_11/attention/self/transpose_3_grad/transpose" + input: "bert/encoder/layer_11/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/MatMul_1" + op: "BatchMatMul" + input: "bert/encoder/layer_11/attention/self/dropout/mul" + input: "gradients/bert/encoder/layer_11/attention/self/transpose_3_grad/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/MatMul" + input: "bert/encoder/layer_11/attention/self/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/MatMul" + input: "bert/encoder/layer_11/attention/self/dropout/div" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/transpose_2_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_11/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/transpose_2_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_11/attention/self/MatMul_1_grad/MatMul_1" + input: "gradients/bert/encoder/layer_11/attention/self/transpose_2_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/dropout/div_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/dropout/div_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/dropout/div_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_11/attention/self/dropout/div_grad/Shape" + input: "gradients/bert/encoder/layer_11/attention/self/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/dropout/div_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/Mul" + input: "bert/encoder/layer_11/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/dropout/div_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_11/attention/self/dropout/div_grad/RealDiv" + input: "gradients/bert/encoder/layer_11/attention/self/dropout/div_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/dropout/div_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/self/dropout/div_grad/Sum" + input: "gradients/bert/encoder/layer_11/attention/self/dropout/div_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/dropout/div_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_11/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/dropout/div_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_11/attention/self/dropout/div_grad/Neg" + input: "bert/encoder/layer_11/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/dropout/div_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_11/attention/self/dropout/div_grad/RealDiv_1" + input: "bert/encoder/layer_11/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/dropout/div_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_11/attention/self/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_11/attention/self/dropout/div_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/dropout/div_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_11/attention/self/dropout/div_grad/mul" + input: "gradients/bert/encoder/layer_11/attention/self/dropout/div_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/dropout/div_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/self/dropout/div_grad/Sum_1" + input: "gradients/bert/encoder/layer_11/attention/self/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/layer_11/attention/self/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/Softmax_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_11/attention/self/dropout/div_grad/Reshape" + input: "bert/encoder/layer_11/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/Softmax_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/Softmax_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_11/attention/self/Softmax_grad/mul" + input: "gradients/bert/encoder/layer_11/attention/self/Softmax_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/Softmax_grad/sub" + op: "Sub" + input: "gradients/bert/encoder/layer_11/attention/self/dropout/div_grad/Reshape" + input: "gradients/bert/encoder/layer_11/attention/self/Softmax_grad/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/Softmax_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_11/attention/self/Softmax_grad/sub" + input: "bert/encoder/layer_11/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/value/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_11/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\001\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_11/attention/self/add_grad/Shape" + input: "gradients/bert/encoder/layer_11/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_11/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_11/attention/self/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/self/add_grad/Sum" + input: "gradients/bert/encoder/layer_11/attention/self/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_11/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_11/attention/self/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/self/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_11/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/value/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_11/attention/self/Reshape_2_grad/Reshape" + input: "bert/encoder/layer_11/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/value/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_11/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Shape" + input: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_11/attention/self/add_grad/Reshape" + input: "bert/encoder/layer_11/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Mul" + input: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Sum" + input: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_11/attention/self/MatMul" + input: "gradients/bert/encoder/layer_11/attention/self/add_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/MatMul_grad/MatMul" + op: "BatchMatMul" + input: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_11/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/MatMul_grad/MatMul_1" + op: "BatchMatMul" + input: "gradients/bert/encoder/layer_11/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_11/attention/self/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_11/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_11/attention/self/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_11/attention/self/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_11/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_11/attention/self/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/layer_11/attention/self/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/self/transpose_grad/transpose" + input: "gradients/bert/encoder/layer_11/attention/self/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_11/attention/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/layer_11/attention/self/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/query/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_11/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/key/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_11/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/query/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_11/attention/self/Reshape_grad/Reshape" + input: "bert/encoder/layer_11/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/query/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_11/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/key/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_11/attention/self/Reshape_1_grad/Reshape" + input: "bert/encoder/layer_11/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_11/attention/self/key/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_11/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_10" + op: "AddN" + input: "gradients/AddN_9" + input: "gradients/bert/encoder/layer_11/attention/self/value/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_11/attention/self/query/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_11/attention/self/key/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 4 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_10" + input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_10" + input: "bert/encoder/layer_10/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_10" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_10" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_10/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_11" + op: "AddN" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_11" + input: "bert/encoder/layer_10/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_11" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_10/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_10/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_10/output/add" + input: "bert/encoder/layer_10/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_10/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/mul" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_10/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_12" + op: "AddN" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_12" + input: "bert/encoder/layer_10/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_12" + input: "bert/encoder/layer_10/output/dropout/div" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/dropout/div_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/dropout/div_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/dropout/div_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_10/output/dropout/div_grad/Shape" + input: "gradients/bert/encoder/layer_10/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/dropout/div_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/Mul" + input: "bert/encoder/layer_10/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/dropout/div_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_10/output/dropout/div_grad/RealDiv" + input: "gradients/bert/encoder/layer_10/output/dropout/div_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/dropout/div_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/output/dropout/div_grad/Sum" + input: "gradients/bert/encoder/layer_10/output/dropout/div_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/dropout/div_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_10/output/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/dropout/div_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_10/output/dropout/div_grad/Neg" + input: "bert/encoder/layer_10/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/dropout/div_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_10/output/dropout/div_grad/RealDiv_1" + input: "bert/encoder/layer_10/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/dropout/div_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_10/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_10/output/dropout/div_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/dropout/div_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_10/output/dropout/div_grad/mul" + input: "gradients/bert/encoder/layer_10/output/dropout/div_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/dropout/div_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/output/dropout/div_grad/Sum_1" + input: "gradients/bert/encoder/layer_10/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_10/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_10/output/dropout/div_grad/Reshape" + input: "bert/encoder/layer_10/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_10/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_10/intermediate/dense/mul_1" + input: "gradients/bert/encoder/layer_10/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_10/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_10/intermediate/dense/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_10/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_10/intermediate/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Shape" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/Mul_1" + input: "bert/encoder/layer_10/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Mul" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Sum" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_10/intermediate/dense/mul/x" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_10/intermediate/dense/add_grad/Shape" + input: "gradients/bert/encoder/layer_10/intermediate/dense/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Reshape_1" + input: "gradients/bert/encoder/layer_10/intermediate/dense/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/intermediate/dense/add_grad/Sum" + input: "gradients/bert/encoder/layer_10/intermediate/dense/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_grad/Reshape_1" + input: "gradients/bert/encoder/layer_10/intermediate/dense/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/intermediate/dense/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_10/intermediate/dense/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/Erf_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.128379225730896 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/Erf_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_10/intermediate/dense/add_grad/Reshape_1" + input: "gradients/bert/encoder/layer_10/intermediate/dense/Erf_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/Erf_grad/Square" + op: "Square" + input: "bert/encoder/layer_10/intermediate/dense/truediv" + input: "^gradients/bert/encoder/layer_10/intermediate/dense/add_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/Erf_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_10/intermediate/dense/Erf_grad/Square" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/Erf_grad/Exp" + op: "Exp" + input: "gradients/bert/encoder/layer_10/intermediate/dense/Erf_grad/Neg" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/Erf_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_10/intermediate/dense/Erf_grad/mul" + input: "gradients/bert/encoder/layer_10/intermediate/dense/Erf_grad/Exp" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/truediv_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/truediv_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/truediv_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_10/intermediate/dense/truediv_grad/Shape" + input: "gradients/bert/encoder/layer_10/intermediate/dense/truediv_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/truediv_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_10/intermediate/dense/Erf_grad/mul_1" + input: "bert/encoder/layer_10/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/truediv_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_10/intermediate/dense/truediv_grad/RealDiv" + input: "gradients/bert/encoder/layer_10/intermediate/dense/truediv_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/truediv_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/intermediate/dense/truediv_grad/Sum" + input: "gradients/bert/encoder/layer_10/intermediate/dense/truediv_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/truediv_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_10/intermediate/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/truediv_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_10/intermediate/dense/truediv_grad/Neg" + input: "bert/encoder/layer_10/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/truediv_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_10/intermediate/dense/truediv_grad/RealDiv_1" + input: "bert/encoder/layer_10/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/truediv_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_10/intermediate/dense/Erf_grad/mul_1" + input: "gradients/bert/encoder/layer_10/intermediate/dense/truediv_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/truediv_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_10/intermediate/dense/truediv_grad/mul" + input: "gradients/bert/encoder/layer_10/intermediate/dense/truediv_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/truediv_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/intermediate/dense/truediv_grad/Sum_1" + input: "gradients/bert/encoder/layer_10/intermediate/dense/truediv_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/AddN_13" + op: "AddN" + input: "gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_10/intermediate/dense/truediv_grad/Reshape" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/intermediate/dense/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/AddN_13" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/AddN_13" + input: "bert/encoder/layer_10/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_10/intermediate/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_1" + input: "gradients/AddN_13" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_14" + op: "AddN" + input: "gradients/AddN_12" + input: "gradients/bert/encoder/layer_10/intermediate/dense/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_14" + input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_14" + input: "bert/encoder/layer_10/attention/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_14" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_14" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_10/attention/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_15" + op: "AddN" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_15" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_15" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_10/attention/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_10/attention/output/add" + input: "bert/encoder/layer_10/attention/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/mul" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_10/attention/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_16" + op: "AddN" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_16" + input: "bert/encoder/layer_10/attention/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_16" + input: "bert/encoder/layer_10/attention/output/dropout/div" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/dropout/div_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/dropout/div_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/dropout/div_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_10/attention/output/dropout/div_grad/Shape" + input: "gradients/bert/encoder/layer_10/attention/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/dropout/div_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/Mul" + input: "bert/encoder/layer_10/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/dropout/div_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_10/attention/output/dropout/div_grad/RealDiv" + input: "gradients/bert/encoder/layer_10/attention/output/dropout/div_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/dropout/div_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/output/dropout/div_grad/Sum" + input: "gradients/bert/encoder/layer_10/attention/output/dropout/div_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/dropout/div_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_10/attention/output/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/dropout/div_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_10/attention/output/dropout/div_grad/Neg" + input: "bert/encoder/layer_10/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/dropout/div_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_10/attention/output/dropout/div_grad/RealDiv_1" + input: "bert/encoder/layer_10/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/dropout/div_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_10/attention/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_10/attention/output/dropout/div_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/dropout/div_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_10/attention/output/dropout/div_grad/mul" + input: "gradients/bert/encoder/layer_10/attention/output/dropout/div_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/dropout/div_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/output/dropout/div_grad/Sum_1" + input: "gradients/bert/encoder/layer_10/attention/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_10/attention/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_10/attention/output/dropout/div_grad/Reshape" + input: "bert/encoder/layer_10/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_10/attention/self/Reshape_3" + input: "gradients/bert/encoder/layer_10/attention/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/Reshape_3_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/Reshape_3_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/output/dense/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_10/attention/self/Reshape_3_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/transpose_3_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_10/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/transpose_3_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_10/attention/self/Reshape_3_grad/Reshape" + input: "gradients/bert/encoder/layer_10/attention/self/transpose_3_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/MatMul" + op: "BatchMatMul" + input: "gradients/bert/encoder/layer_10/attention/self/transpose_3_grad/transpose" + input: "bert/encoder/layer_10/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/MatMul_1" + op: "BatchMatMul" + input: "bert/encoder/layer_10/attention/self/dropout/mul" + input: "gradients/bert/encoder/layer_10/attention/self/transpose_3_grad/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/MatMul" + input: "bert/encoder/layer_10/attention/self/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/MatMul" + input: "bert/encoder/layer_10/attention/self/dropout/div" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/transpose_2_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_10/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/transpose_2_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_10/attention/self/MatMul_1_grad/MatMul_1" + input: "gradients/bert/encoder/layer_10/attention/self/transpose_2_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/dropout/div_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/dropout/div_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/dropout/div_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_10/attention/self/dropout/div_grad/Shape" + input: "gradients/bert/encoder/layer_10/attention/self/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/dropout/div_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/Mul" + input: "bert/encoder/layer_10/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/dropout/div_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_10/attention/self/dropout/div_grad/RealDiv" + input: "gradients/bert/encoder/layer_10/attention/self/dropout/div_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/dropout/div_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/self/dropout/div_grad/Sum" + input: "gradients/bert/encoder/layer_10/attention/self/dropout/div_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/dropout/div_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_10/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/dropout/div_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_10/attention/self/dropout/div_grad/Neg" + input: "bert/encoder/layer_10/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/dropout/div_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_10/attention/self/dropout/div_grad/RealDiv_1" + input: "bert/encoder/layer_10/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/dropout/div_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_10/attention/self/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_10/attention/self/dropout/div_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/dropout/div_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_10/attention/self/dropout/div_grad/mul" + input: "gradients/bert/encoder/layer_10/attention/self/dropout/div_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/dropout/div_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/self/dropout/div_grad/Sum_1" + input: "gradients/bert/encoder/layer_10/attention/self/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/layer_10/attention/self/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/Softmax_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_10/attention/self/dropout/div_grad/Reshape" + input: "bert/encoder/layer_10/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/Softmax_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/Softmax_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_10/attention/self/Softmax_grad/mul" + input: "gradients/bert/encoder/layer_10/attention/self/Softmax_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/Softmax_grad/sub" + op: "Sub" + input: "gradients/bert/encoder/layer_10/attention/self/dropout/div_grad/Reshape" + input: "gradients/bert/encoder/layer_10/attention/self/Softmax_grad/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/Softmax_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_10/attention/self/Softmax_grad/sub" + input: "bert/encoder/layer_10/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/value/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_10/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\001\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_10/attention/self/add_grad/Shape" + input: "gradients/bert/encoder/layer_10/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_10/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_10/attention/self/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/self/add_grad/Sum" + input: "gradients/bert/encoder/layer_10/attention/self/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_10/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_10/attention/self/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/self/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_10/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/value/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_10/attention/self/Reshape_2_grad/Reshape" + input: "bert/encoder/layer_10/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/value/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_10/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Shape" + input: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_10/attention/self/add_grad/Reshape" + input: "bert/encoder/layer_10/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Mul" + input: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Sum" + input: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_10/attention/self/MatMul" + input: "gradients/bert/encoder/layer_10/attention/self/add_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/MatMul_grad/MatMul" + op: "BatchMatMul" + input: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_10/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/MatMul_grad/MatMul_1" + op: "BatchMatMul" + input: "gradients/bert/encoder/layer_10/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_10/attention/self/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_10/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_10/attention/self/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_10/attention/self/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_10/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_10/attention/self/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/layer_10/attention/self/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/self/transpose_grad/transpose" + input: "gradients/bert/encoder/layer_10/attention/self/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_10/attention/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/layer_10/attention/self/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/query/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_10/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/key/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_10/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/query/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_10/attention/self/Reshape_grad/Reshape" + input: "bert/encoder/layer_10/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/query/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_10/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/key/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_10/attention/self/Reshape_1_grad/Reshape" + input: "bert/encoder/layer_10/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_10/attention/self/key/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_10/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_17" + op: "AddN" + input: "gradients/AddN_16" + input: "gradients/bert/encoder/layer_10/attention/self/value/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_10/attention/self/query/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_10/attention/self/key/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 4 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_17" + input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_17" + input: "bert/encoder/layer_9/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_17" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_17" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_9/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_18" + op: "AddN" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_18" + input: "bert/encoder/layer_9/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_18" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_9/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_9/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_9/output/add" + input: "bert/encoder/layer_9/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_9/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/mul" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_9/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_19" + op: "AddN" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_19" + input: "bert/encoder/layer_9/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_19" + input: "bert/encoder/layer_9/output/dropout/div" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/dropout/div_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/dropout/div_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/dropout/div_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_9/output/dropout/div_grad/Shape" + input: "gradients/bert/encoder/layer_9/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/dropout/div_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/Mul" + input: "bert/encoder/layer_9/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/dropout/div_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_9/output/dropout/div_grad/RealDiv" + input: "gradients/bert/encoder/layer_9/output/dropout/div_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/dropout/div_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/output/dropout/div_grad/Sum" + input: "gradients/bert/encoder/layer_9/output/dropout/div_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/dropout/div_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_9/output/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/dropout/div_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_9/output/dropout/div_grad/Neg" + input: "bert/encoder/layer_9/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/dropout/div_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_9/output/dropout/div_grad/RealDiv_1" + input: "bert/encoder/layer_9/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/dropout/div_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_9/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_9/output/dropout/div_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/dropout/div_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_9/output/dropout/div_grad/mul" + input: "gradients/bert/encoder/layer_9/output/dropout/div_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/dropout/div_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/output/dropout/div_grad/Sum_1" + input: "gradients/bert/encoder/layer_9/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_9/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_9/output/dropout/div_grad/Reshape" + input: "bert/encoder/layer_9/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_9/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_9/intermediate/dense/mul_1" + input: "gradients/bert/encoder/layer_9/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_9/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_9/intermediate/dense/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_9/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_9/intermediate/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Shape" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/Mul_1" + input: "bert/encoder/layer_9/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Mul" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Sum" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_9/intermediate/dense/mul/x" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_9/intermediate/dense/add_grad/Shape" + input: "gradients/bert/encoder/layer_9/intermediate/dense/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Reshape_1" + input: "gradients/bert/encoder/layer_9/intermediate/dense/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/intermediate/dense/add_grad/Sum" + input: "gradients/bert/encoder/layer_9/intermediate/dense/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_grad/Reshape_1" + input: "gradients/bert/encoder/layer_9/intermediate/dense/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/intermediate/dense/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_9/intermediate/dense/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/Erf_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.128379225730896 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/Erf_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_9/intermediate/dense/add_grad/Reshape_1" + input: "gradients/bert/encoder/layer_9/intermediate/dense/Erf_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/Erf_grad/Square" + op: "Square" + input: "bert/encoder/layer_9/intermediate/dense/truediv" + input: "^gradients/bert/encoder/layer_9/intermediate/dense/add_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/Erf_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_9/intermediate/dense/Erf_grad/Square" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/Erf_grad/Exp" + op: "Exp" + input: "gradients/bert/encoder/layer_9/intermediate/dense/Erf_grad/Neg" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/Erf_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_9/intermediate/dense/Erf_grad/mul" + input: "gradients/bert/encoder/layer_9/intermediate/dense/Erf_grad/Exp" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/truediv_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/truediv_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/truediv_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_9/intermediate/dense/truediv_grad/Shape" + input: "gradients/bert/encoder/layer_9/intermediate/dense/truediv_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/truediv_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_9/intermediate/dense/Erf_grad/mul_1" + input: "bert/encoder/layer_9/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/truediv_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_9/intermediate/dense/truediv_grad/RealDiv" + input: "gradients/bert/encoder/layer_9/intermediate/dense/truediv_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/truediv_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/intermediate/dense/truediv_grad/Sum" + input: "gradients/bert/encoder/layer_9/intermediate/dense/truediv_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/truediv_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_9/intermediate/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/truediv_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_9/intermediate/dense/truediv_grad/Neg" + input: "bert/encoder/layer_9/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/truediv_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_9/intermediate/dense/truediv_grad/RealDiv_1" + input: "bert/encoder/layer_9/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/truediv_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_9/intermediate/dense/Erf_grad/mul_1" + input: "gradients/bert/encoder/layer_9/intermediate/dense/truediv_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/truediv_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_9/intermediate/dense/truediv_grad/mul" + input: "gradients/bert/encoder/layer_9/intermediate/dense/truediv_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/truediv_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/intermediate/dense/truediv_grad/Sum_1" + input: "gradients/bert/encoder/layer_9/intermediate/dense/truediv_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/AddN_20" + op: "AddN" + input: "gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_9/intermediate/dense/truediv_grad/Reshape" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/intermediate/dense/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/AddN_20" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/AddN_20" + input: "bert/encoder/layer_9/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_9/intermediate/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_1" + input: "gradients/AddN_20" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_21" + op: "AddN" + input: "gradients/AddN_19" + input: "gradients/bert/encoder/layer_9/intermediate/dense/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_21" + input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_21" + input: "bert/encoder/layer_9/attention/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_21" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_21" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_9/attention/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_22" + op: "AddN" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_22" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_22" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_9/attention/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_9/attention/output/add" + input: "bert/encoder/layer_9/attention/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/mul" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_9/attention/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_23" + op: "AddN" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_23" + input: "bert/encoder/layer_9/attention/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_23" + input: "bert/encoder/layer_9/attention/output/dropout/div" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/dropout/div_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/dropout/div_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/dropout/div_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_9/attention/output/dropout/div_grad/Shape" + input: "gradients/bert/encoder/layer_9/attention/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/dropout/div_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/Mul" + input: "bert/encoder/layer_9/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/dropout/div_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_9/attention/output/dropout/div_grad/RealDiv" + input: "gradients/bert/encoder/layer_9/attention/output/dropout/div_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/dropout/div_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/output/dropout/div_grad/Sum" + input: "gradients/bert/encoder/layer_9/attention/output/dropout/div_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/dropout/div_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_9/attention/output/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/dropout/div_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_9/attention/output/dropout/div_grad/Neg" + input: "bert/encoder/layer_9/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/dropout/div_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_9/attention/output/dropout/div_grad/RealDiv_1" + input: "bert/encoder/layer_9/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/dropout/div_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_9/attention/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_9/attention/output/dropout/div_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/dropout/div_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_9/attention/output/dropout/div_grad/mul" + input: "gradients/bert/encoder/layer_9/attention/output/dropout/div_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/dropout/div_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/output/dropout/div_grad/Sum_1" + input: "gradients/bert/encoder/layer_9/attention/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_9/attention/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_9/attention/output/dropout/div_grad/Reshape" + input: "bert/encoder/layer_9/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_9/attention/self/Reshape_3" + input: "gradients/bert/encoder/layer_9/attention/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/Reshape_3_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/Reshape_3_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/output/dense/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_9/attention/self/Reshape_3_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/transpose_3_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_9/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/transpose_3_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_9/attention/self/Reshape_3_grad/Reshape" + input: "gradients/bert/encoder/layer_9/attention/self/transpose_3_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/MatMul" + op: "BatchMatMul" + input: "gradients/bert/encoder/layer_9/attention/self/transpose_3_grad/transpose" + input: "bert/encoder/layer_9/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/MatMul_1" + op: "BatchMatMul" + input: "bert/encoder/layer_9/attention/self/dropout/mul" + input: "gradients/bert/encoder/layer_9/attention/self/transpose_3_grad/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/MatMul" + input: "bert/encoder/layer_9/attention/self/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/MatMul" + input: "bert/encoder/layer_9/attention/self/dropout/div" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/transpose_2_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_9/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/transpose_2_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_9/attention/self/MatMul_1_grad/MatMul_1" + input: "gradients/bert/encoder/layer_9/attention/self/transpose_2_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/dropout/div_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/dropout/div_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/dropout/div_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_9/attention/self/dropout/div_grad/Shape" + input: "gradients/bert/encoder/layer_9/attention/self/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/dropout/div_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/Mul" + input: "bert/encoder/layer_9/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/dropout/div_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_9/attention/self/dropout/div_grad/RealDiv" + input: "gradients/bert/encoder/layer_9/attention/self/dropout/div_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/dropout/div_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/self/dropout/div_grad/Sum" + input: "gradients/bert/encoder/layer_9/attention/self/dropout/div_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/dropout/div_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_9/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/dropout/div_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_9/attention/self/dropout/div_grad/Neg" + input: "bert/encoder/layer_9/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/dropout/div_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_9/attention/self/dropout/div_grad/RealDiv_1" + input: "bert/encoder/layer_9/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/dropout/div_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_9/attention/self/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_9/attention/self/dropout/div_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/dropout/div_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_9/attention/self/dropout/div_grad/mul" + input: "gradients/bert/encoder/layer_9/attention/self/dropout/div_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/dropout/div_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/self/dropout/div_grad/Sum_1" + input: "gradients/bert/encoder/layer_9/attention/self/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/layer_9/attention/self/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/Softmax_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_9/attention/self/dropout/div_grad/Reshape" + input: "bert/encoder/layer_9/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/Softmax_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/Softmax_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_9/attention/self/Softmax_grad/mul" + input: "gradients/bert/encoder/layer_9/attention/self/Softmax_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/Softmax_grad/sub" + op: "Sub" + input: "gradients/bert/encoder/layer_9/attention/self/dropout/div_grad/Reshape" + input: "gradients/bert/encoder/layer_9/attention/self/Softmax_grad/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/Softmax_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_9/attention/self/Softmax_grad/sub" + input: "bert/encoder/layer_9/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/value/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_9/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\001\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_9/attention/self/add_grad/Shape" + input: "gradients/bert/encoder/layer_9/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_9/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_9/attention/self/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/self/add_grad/Sum" + input: "gradients/bert/encoder/layer_9/attention/self/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_9/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_9/attention/self/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/self/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_9/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/value/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_9/attention/self/Reshape_2_grad/Reshape" + input: "bert/encoder/layer_9/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/value/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_9/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Shape" + input: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_9/attention/self/add_grad/Reshape" + input: "bert/encoder/layer_9/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Mul" + input: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Sum" + input: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_9/attention/self/MatMul" + input: "gradients/bert/encoder/layer_9/attention/self/add_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/MatMul_grad/MatMul" + op: "BatchMatMul" + input: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_9/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/MatMul_grad/MatMul_1" + op: "BatchMatMul" + input: "gradients/bert/encoder/layer_9/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_9/attention/self/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_9/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_9/attention/self/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_9/attention/self/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_9/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_9/attention/self/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/layer_9/attention/self/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/self/transpose_grad/transpose" + input: "gradients/bert/encoder/layer_9/attention/self/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_9/attention/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/layer_9/attention/self/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/query/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_9/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/key/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_9/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/query/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_9/attention/self/Reshape_grad/Reshape" + input: "bert/encoder/layer_9/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/query/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_9/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/key/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_9/attention/self/Reshape_1_grad/Reshape" + input: "bert/encoder/layer_9/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_9/attention/self/key/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_9/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_24" + op: "AddN" + input: "gradients/AddN_23" + input: "gradients/bert/encoder/layer_9/attention/self/value/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_9/attention/self/query/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_9/attention/self/key/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 4 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_24" + input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_24" + input: "bert/encoder/layer_8/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_24" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_24" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_8/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_25" + op: "AddN" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_25" + input: "bert/encoder/layer_8/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_25" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_8/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_8/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_8/output/add" + input: "bert/encoder/layer_8/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_8/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/mul" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_8/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_26" + op: "AddN" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_26" + input: "bert/encoder/layer_8/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_26" + input: "bert/encoder/layer_8/output/dropout/div" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/dropout/div_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/dropout/div_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/dropout/div_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_8/output/dropout/div_grad/Shape" + input: "gradients/bert/encoder/layer_8/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/dropout/div_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/Mul" + input: "bert/encoder/layer_8/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/dropout/div_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_8/output/dropout/div_grad/RealDiv" + input: "gradients/bert/encoder/layer_8/output/dropout/div_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/dropout/div_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/output/dropout/div_grad/Sum" + input: "gradients/bert/encoder/layer_8/output/dropout/div_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/dropout/div_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_8/output/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/dropout/div_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_8/output/dropout/div_grad/Neg" + input: "bert/encoder/layer_8/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/dropout/div_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_8/output/dropout/div_grad/RealDiv_1" + input: "bert/encoder/layer_8/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/dropout/div_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_8/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_8/output/dropout/div_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/dropout/div_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_8/output/dropout/div_grad/mul" + input: "gradients/bert/encoder/layer_8/output/dropout/div_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/dropout/div_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/output/dropout/div_grad/Sum_1" + input: "gradients/bert/encoder/layer_8/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_8/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_8/output/dropout/div_grad/Reshape" + input: "bert/encoder/layer_8/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_8/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_8/intermediate/dense/mul_1" + input: "gradients/bert/encoder/layer_8/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_8/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_8/intermediate/dense/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_8/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_8/intermediate/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Shape" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/Mul_1" + input: "bert/encoder/layer_8/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Mul" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Sum" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_8/intermediate/dense/mul/x" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_8/intermediate/dense/add_grad/Shape" + input: "gradients/bert/encoder/layer_8/intermediate/dense/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Reshape_1" + input: "gradients/bert/encoder/layer_8/intermediate/dense/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/intermediate/dense/add_grad/Sum" + input: "gradients/bert/encoder/layer_8/intermediate/dense/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_grad/Reshape_1" + input: "gradients/bert/encoder/layer_8/intermediate/dense/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/intermediate/dense/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_8/intermediate/dense/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/Erf_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.128379225730896 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/Erf_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_8/intermediate/dense/add_grad/Reshape_1" + input: "gradients/bert/encoder/layer_8/intermediate/dense/Erf_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/Erf_grad/Square" + op: "Square" + input: "bert/encoder/layer_8/intermediate/dense/truediv" + input: "^gradients/bert/encoder/layer_8/intermediate/dense/add_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/Erf_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_8/intermediate/dense/Erf_grad/Square" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/Erf_grad/Exp" + op: "Exp" + input: "gradients/bert/encoder/layer_8/intermediate/dense/Erf_grad/Neg" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/Erf_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_8/intermediate/dense/Erf_grad/mul" + input: "gradients/bert/encoder/layer_8/intermediate/dense/Erf_grad/Exp" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/truediv_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/truediv_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/truediv_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_8/intermediate/dense/truediv_grad/Shape" + input: "gradients/bert/encoder/layer_8/intermediate/dense/truediv_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/truediv_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_8/intermediate/dense/Erf_grad/mul_1" + input: "bert/encoder/layer_8/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/truediv_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_8/intermediate/dense/truediv_grad/RealDiv" + input: "gradients/bert/encoder/layer_8/intermediate/dense/truediv_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/truediv_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/intermediate/dense/truediv_grad/Sum" + input: "gradients/bert/encoder/layer_8/intermediate/dense/truediv_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/truediv_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_8/intermediate/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/truediv_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_8/intermediate/dense/truediv_grad/Neg" + input: "bert/encoder/layer_8/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/truediv_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_8/intermediate/dense/truediv_grad/RealDiv_1" + input: "bert/encoder/layer_8/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/truediv_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_8/intermediate/dense/Erf_grad/mul_1" + input: "gradients/bert/encoder/layer_8/intermediate/dense/truediv_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/truediv_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_8/intermediate/dense/truediv_grad/mul" + input: "gradients/bert/encoder/layer_8/intermediate/dense/truediv_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/truediv_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/intermediate/dense/truediv_grad/Sum_1" + input: "gradients/bert/encoder/layer_8/intermediate/dense/truediv_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/AddN_27" + op: "AddN" + input: "gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_8/intermediate/dense/truediv_grad/Reshape" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/intermediate/dense/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/AddN_27" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/AddN_27" + input: "bert/encoder/layer_8/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_8/intermediate/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_1" + input: "gradients/AddN_27" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_28" + op: "AddN" + input: "gradients/AddN_26" + input: "gradients/bert/encoder/layer_8/intermediate/dense/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_28" + input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_28" + input: "bert/encoder/layer_8/attention/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_28" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_28" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_8/attention/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_29" + op: "AddN" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_29" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_29" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_8/attention/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_8/attention/output/add" + input: "bert/encoder/layer_8/attention/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/mul" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_8/attention/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_30" + op: "AddN" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_30" + input: "bert/encoder/layer_8/attention/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_30" + input: "bert/encoder/layer_8/attention/output/dropout/div" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/dropout/div_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/dropout/div_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/dropout/div_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_8/attention/output/dropout/div_grad/Shape" + input: "gradients/bert/encoder/layer_8/attention/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/dropout/div_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/Mul" + input: "bert/encoder/layer_8/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/dropout/div_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_8/attention/output/dropout/div_grad/RealDiv" + input: "gradients/bert/encoder/layer_8/attention/output/dropout/div_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/dropout/div_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/output/dropout/div_grad/Sum" + input: "gradients/bert/encoder/layer_8/attention/output/dropout/div_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/dropout/div_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_8/attention/output/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/dropout/div_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_8/attention/output/dropout/div_grad/Neg" + input: "bert/encoder/layer_8/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/dropout/div_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_8/attention/output/dropout/div_grad/RealDiv_1" + input: "bert/encoder/layer_8/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/dropout/div_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_8/attention/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_8/attention/output/dropout/div_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/dropout/div_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_8/attention/output/dropout/div_grad/mul" + input: "gradients/bert/encoder/layer_8/attention/output/dropout/div_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/dropout/div_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/output/dropout/div_grad/Sum_1" + input: "gradients/bert/encoder/layer_8/attention/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_8/attention/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_8/attention/output/dropout/div_grad/Reshape" + input: "bert/encoder/layer_8/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_8/attention/self/Reshape_3" + input: "gradients/bert/encoder/layer_8/attention/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/Reshape_3_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/Reshape_3_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/output/dense/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_8/attention/self/Reshape_3_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/transpose_3_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_8/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/transpose_3_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_8/attention/self/Reshape_3_grad/Reshape" + input: "gradients/bert/encoder/layer_8/attention/self/transpose_3_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/MatMul" + op: "BatchMatMul" + input: "gradients/bert/encoder/layer_8/attention/self/transpose_3_grad/transpose" + input: "bert/encoder/layer_8/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/MatMul_1" + op: "BatchMatMul" + input: "bert/encoder/layer_8/attention/self/dropout/mul" + input: "gradients/bert/encoder/layer_8/attention/self/transpose_3_grad/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/MatMul" + input: "bert/encoder/layer_8/attention/self/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/MatMul" + input: "bert/encoder/layer_8/attention/self/dropout/div" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/transpose_2_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_8/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/transpose_2_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_8/attention/self/MatMul_1_grad/MatMul_1" + input: "gradients/bert/encoder/layer_8/attention/self/transpose_2_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/dropout/div_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/dropout/div_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/dropout/div_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_8/attention/self/dropout/div_grad/Shape" + input: "gradients/bert/encoder/layer_8/attention/self/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/dropout/div_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/Mul" + input: "bert/encoder/layer_8/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/dropout/div_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_8/attention/self/dropout/div_grad/RealDiv" + input: "gradients/bert/encoder/layer_8/attention/self/dropout/div_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/dropout/div_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/self/dropout/div_grad/Sum" + input: "gradients/bert/encoder/layer_8/attention/self/dropout/div_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/dropout/div_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_8/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/dropout/div_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_8/attention/self/dropout/div_grad/Neg" + input: "bert/encoder/layer_8/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/dropout/div_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_8/attention/self/dropout/div_grad/RealDiv_1" + input: "bert/encoder/layer_8/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/dropout/div_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_8/attention/self/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_8/attention/self/dropout/div_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/dropout/div_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_8/attention/self/dropout/div_grad/mul" + input: "gradients/bert/encoder/layer_8/attention/self/dropout/div_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/dropout/div_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/self/dropout/div_grad/Sum_1" + input: "gradients/bert/encoder/layer_8/attention/self/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/layer_8/attention/self/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/Softmax_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_8/attention/self/dropout/div_grad/Reshape" + input: "bert/encoder/layer_8/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/Softmax_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/Softmax_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_8/attention/self/Softmax_grad/mul" + input: "gradients/bert/encoder/layer_8/attention/self/Softmax_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/Softmax_grad/sub" + op: "Sub" + input: "gradients/bert/encoder/layer_8/attention/self/dropout/div_grad/Reshape" + input: "gradients/bert/encoder/layer_8/attention/self/Softmax_grad/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/Softmax_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_8/attention/self/Softmax_grad/sub" + input: "bert/encoder/layer_8/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/value/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_8/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\001\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_8/attention/self/add_grad/Shape" + input: "gradients/bert/encoder/layer_8/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_8/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_8/attention/self/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/self/add_grad/Sum" + input: "gradients/bert/encoder/layer_8/attention/self/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_8/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_8/attention/self/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/self/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_8/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/value/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_8/attention/self/Reshape_2_grad/Reshape" + input: "bert/encoder/layer_8/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/value/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_8/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Shape" + input: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_8/attention/self/add_grad/Reshape" + input: "bert/encoder/layer_8/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Mul" + input: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Sum" + input: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_8/attention/self/MatMul" + input: "gradients/bert/encoder/layer_8/attention/self/add_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/MatMul_grad/MatMul" + op: "BatchMatMul" + input: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_8/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/MatMul_grad/MatMul_1" + op: "BatchMatMul" + input: "gradients/bert/encoder/layer_8/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_8/attention/self/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_8/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_8/attention/self/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_8/attention/self/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_8/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_8/attention/self/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/layer_8/attention/self/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/self/transpose_grad/transpose" + input: "gradients/bert/encoder/layer_8/attention/self/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_8/attention/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/layer_8/attention/self/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/query/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_8/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/key/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_8/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/query/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_8/attention/self/Reshape_grad/Reshape" + input: "bert/encoder/layer_8/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/query/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_8/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/key/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_8/attention/self/Reshape_1_grad/Reshape" + input: "bert/encoder/layer_8/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_8/attention/self/key/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_8/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_31" + op: "AddN" + input: "gradients/AddN_30" + input: "gradients/bert/encoder/layer_8/attention/self/value/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_8/attention/self/query/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_8/attention/self/key/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 4 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_31" + input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_31" + input: "bert/encoder/layer_7/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_31" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_31" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_7/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_32" + op: "AddN" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_32" + input: "bert/encoder/layer_7/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_32" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_7/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_7/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_7/output/add" + input: "bert/encoder/layer_7/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_7/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/mul" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_7/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_33" + op: "AddN" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_33" + input: "bert/encoder/layer_7/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_33" + input: "bert/encoder/layer_7/output/dropout/div" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/dropout/div_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/dropout/div_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/dropout/div_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_7/output/dropout/div_grad/Shape" + input: "gradients/bert/encoder/layer_7/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/dropout/div_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/Mul" + input: "bert/encoder/layer_7/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/dropout/div_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_7/output/dropout/div_grad/RealDiv" + input: "gradients/bert/encoder/layer_7/output/dropout/div_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/dropout/div_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/output/dropout/div_grad/Sum" + input: "gradients/bert/encoder/layer_7/output/dropout/div_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/dropout/div_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_7/output/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/dropout/div_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_7/output/dropout/div_grad/Neg" + input: "bert/encoder/layer_7/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/dropout/div_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_7/output/dropout/div_grad/RealDiv_1" + input: "bert/encoder/layer_7/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/dropout/div_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_7/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_7/output/dropout/div_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/dropout/div_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_7/output/dropout/div_grad/mul" + input: "gradients/bert/encoder/layer_7/output/dropout/div_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/dropout/div_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/output/dropout/div_grad/Sum_1" + input: "gradients/bert/encoder/layer_7/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_7/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_7/output/dropout/div_grad/Reshape" + input: "bert/encoder/layer_7/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_7/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_7/intermediate/dense/mul_1" + input: "gradients/bert/encoder/layer_7/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_7/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_7/intermediate/dense/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_7/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_7/intermediate/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Shape" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/Mul_1" + input: "bert/encoder/layer_7/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Mul" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Sum" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_7/intermediate/dense/mul/x" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_7/intermediate/dense/add_grad/Shape" + input: "gradients/bert/encoder/layer_7/intermediate/dense/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Reshape_1" + input: "gradients/bert/encoder/layer_7/intermediate/dense/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/intermediate/dense/add_grad/Sum" + input: "gradients/bert/encoder/layer_7/intermediate/dense/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_grad/Reshape_1" + input: "gradients/bert/encoder/layer_7/intermediate/dense/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/intermediate/dense/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_7/intermediate/dense/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/Erf_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.128379225730896 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/Erf_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_7/intermediate/dense/add_grad/Reshape_1" + input: "gradients/bert/encoder/layer_7/intermediate/dense/Erf_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/Erf_grad/Square" + op: "Square" + input: "bert/encoder/layer_7/intermediate/dense/truediv" + input: "^gradients/bert/encoder/layer_7/intermediate/dense/add_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/Erf_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_7/intermediate/dense/Erf_grad/Square" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/Erf_grad/Exp" + op: "Exp" + input: "gradients/bert/encoder/layer_7/intermediate/dense/Erf_grad/Neg" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/Erf_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_7/intermediate/dense/Erf_grad/mul" + input: "gradients/bert/encoder/layer_7/intermediate/dense/Erf_grad/Exp" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/truediv_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/truediv_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/truediv_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_7/intermediate/dense/truediv_grad/Shape" + input: "gradients/bert/encoder/layer_7/intermediate/dense/truediv_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/truediv_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_7/intermediate/dense/Erf_grad/mul_1" + input: "bert/encoder/layer_7/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/truediv_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_7/intermediate/dense/truediv_grad/RealDiv" + input: "gradients/bert/encoder/layer_7/intermediate/dense/truediv_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/truediv_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/intermediate/dense/truediv_grad/Sum" + input: "gradients/bert/encoder/layer_7/intermediate/dense/truediv_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/truediv_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_7/intermediate/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/truediv_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_7/intermediate/dense/truediv_grad/Neg" + input: "bert/encoder/layer_7/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/truediv_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_7/intermediate/dense/truediv_grad/RealDiv_1" + input: "bert/encoder/layer_7/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/truediv_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_7/intermediate/dense/Erf_grad/mul_1" + input: "gradients/bert/encoder/layer_7/intermediate/dense/truediv_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/truediv_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_7/intermediate/dense/truediv_grad/mul" + input: "gradients/bert/encoder/layer_7/intermediate/dense/truediv_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/truediv_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/intermediate/dense/truediv_grad/Sum_1" + input: "gradients/bert/encoder/layer_7/intermediate/dense/truediv_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/AddN_34" + op: "AddN" + input: "gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_7/intermediate/dense/truediv_grad/Reshape" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/intermediate/dense/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/AddN_34" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/AddN_34" + input: "bert/encoder/layer_7/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_7/intermediate/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_1" + input: "gradients/AddN_34" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_35" + op: "AddN" + input: "gradients/AddN_33" + input: "gradients/bert/encoder/layer_7/intermediate/dense/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_35" + input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_35" + input: "bert/encoder/layer_7/attention/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_35" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_35" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_7/attention/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_36" + op: "AddN" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_36" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_36" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_7/attention/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_7/attention/output/add" + input: "bert/encoder/layer_7/attention/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/mul" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_7/attention/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_37" + op: "AddN" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_37" + input: "bert/encoder/layer_7/attention/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_37" + input: "bert/encoder/layer_7/attention/output/dropout/div" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/dropout/div_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/dropout/div_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/dropout/div_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_7/attention/output/dropout/div_grad/Shape" + input: "gradients/bert/encoder/layer_7/attention/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/dropout/div_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/Mul" + input: "bert/encoder/layer_7/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/dropout/div_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_7/attention/output/dropout/div_grad/RealDiv" + input: "gradients/bert/encoder/layer_7/attention/output/dropout/div_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/dropout/div_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/output/dropout/div_grad/Sum" + input: "gradients/bert/encoder/layer_7/attention/output/dropout/div_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/dropout/div_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_7/attention/output/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/dropout/div_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_7/attention/output/dropout/div_grad/Neg" + input: "bert/encoder/layer_7/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/dropout/div_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_7/attention/output/dropout/div_grad/RealDiv_1" + input: "bert/encoder/layer_7/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/dropout/div_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_7/attention/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_7/attention/output/dropout/div_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/dropout/div_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_7/attention/output/dropout/div_grad/mul" + input: "gradients/bert/encoder/layer_7/attention/output/dropout/div_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/dropout/div_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/output/dropout/div_grad/Sum_1" + input: "gradients/bert/encoder/layer_7/attention/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_7/attention/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_7/attention/output/dropout/div_grad/Reshape" + input: "bert/encoder/layer_7/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_7/attention/self/Reshape_3" + input: "gradients/bert/encoder/layer_7/attention/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/Reshape_3_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/Reshape_3_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/output/dense/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_7/attention/self/Reshape_3_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/transpose_3_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_7/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/transpose_3_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_7/attention/self/Reshape_3_grad/Reshape" + input: "gradients/bert/encoder/layer_7/attention/self/transpose_3_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/MatMul" + op: "BatchMatMul" + input: "gradients/bert/encoder/layer_7/attention/self/transpose_3_grad/transpose" + input: "bert/encoder/layer_7/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/MatMul_1" + op: "BatchMatMul" + input: "bert/encoder/layer_7/attention/self/dropout/mul" + input: "gradients/bert/encoder/layer_7/attention/self/transpose_3_grad/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/MatMul" + input: "bert/encoder/layer_7/attention/self/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/MatMul" + input: "bert/encoder/layer_7/attention/self/dropout/div" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/transpose_2_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_7/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/transpose_2_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_7/attention/self/MatMul_1_grad/MatMul_1" + input: "gradients/bert/encoder/layer_7/attention/self/transpose_2_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/dropout/div_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/dropout/div_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/dropout/div_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_7/attention/self/dropout/div_grad/Shape" + input: "gradients/bert/encoder/layer_7/attention/self/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/dropout/div_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/Mul" + input: "bert/encoder/layer_7/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/dropout/div_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_7/attention/self/dropout/div_grad/RealDiv" + input: "gradients/bert/encoder/layer_7/attention/self/dropout/div_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/dropout/div_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/self/dropout/div_grad/Sum" + input: "gradients/bert/encoder/layer_7/attention/self/dropout/div_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/dropout/div_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_7/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/dropout/div_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_7/attention/self/dropout/div_grad/Neg" + input: "bert/encoder/layer_7/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/dropout/div_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_7/attention/self/dropout/div_grad/RealDiv_1" + input: "bert/encoder/layer_7/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/dropout/div_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_7/attention/self/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_7/attention/self/dropout/div_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/dropout/div_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_7/attention/self/dropout/div_grad/mul" + input: "gradients/bert/encoder/layer_7/attention/self/dropout/div_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/dropout/div_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/self/dropout/div_grad/Sum_1" + input: "gradients/bert/encoder/layer_7/attention/self/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/layer_7/attention/self/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/Softmax_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_7/attention/self/dropout/div_grad/Reshape" + input: "bert/encoder/layer_7/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/Softmax_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/Softmax_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_7/attention/self/Softmax_grad/mul" + input: "gradients/bert/encoder/layer_7/attention/self/Softmax_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/Softmax_grad/sub" + op: "Sub" + input: "gradients/bert/encoder/layer_7/attention/self/dropout/div_grad/Reshape" + input: "gradients/bert/encoder/layer_7/attention/self/Softmax_grad/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/Softmax_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_7/attention/self/Softmax_grad/sub" + input: "bert/encoder/layer_7/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/value/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_7/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\001\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_7/attention/self/add_grad/Shape" + input: "gradients/bert/encoder/layer_7/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_7/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_7/attention/self/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/self/add_grad/Sum" + input: "gradients/bert/encoder/layer_7/attention/self/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_7/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_7/attention/self/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/self/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_7/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/value/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_7/attention/self/Reshape_2_grad/Reshape" + input: "bert/encoder/layer_7/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/value/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_7/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Shape" + input: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_7/attention/self/add_grad/Reshape" + input: "bert/encoder/layer_7/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Mul" + input: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Sum" + input: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_7/attention/self/MatMul" + input: "gradients/bert/encoder/layer_7/attention/self/add_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/MatMul_grad/MatMul" + op: "BatchMatMul" + input: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_7/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/MatMul_grad/MatMul_1" + op: "BatchMatMul" + input: "gradients/bert/encoder/layer_7/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_7/attention/self/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_7/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_7/attention/self/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_7/attention/self/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_7/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_7/attention/self/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/layer_7/attention/self/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/self/transpose_grad/transpose" + input: "gradients/bert/encoder/layer_7/attention/self/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_7/attention/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/layer_7/attention/self/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/query/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_7/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/key/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_7/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/query/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_7/attention/self/Reshape_grad/Reshape" + input: "bert/encoder/layer_7/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/query/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_7/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/key/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_7/attention/self/Reshape_1_grad/Reshape" + input: "bert/encoder/layer_7/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_7/attention/self/key/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_7/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_38" + op: "AddN" + input: "gradients/AddN_37" + input: "gradients/bert/encoder/layer_7/attention/self/value/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_7/attention/self/query/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_7/attention/self/key/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 4 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_38" + input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_38" + input: "bert/encoder/layer_6/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_38" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_38" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_6/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_39" + op: "AddN" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_39" + input: "bert/encoder/layer_6/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_39" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_6/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_6/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_6/output/add" + input: "bert/encoder/layer_6/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_6/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/mul" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_6/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_40" + op: "AddN" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_40" + input: "bert/encoder/layer_6/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_40" + input: "bert/encoder/layer_6/output/dropout/div" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/dropout/div_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/dropout/div_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/dropout/div_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_6/output/dropout/div_grad/Shape" + input: "gradients/bert/encoder/layer_6/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/dropout/div_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/Mul" + input: "bert/encoder/layer_6/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/dropout/div_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_6/output/dropout/div_grad/RealDiv" + input: "gradients/bert/encoder/layer_6/output/dropout/div_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/dropout/div_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/output/dropout/div_grad/Sum" + input: "gradients/bert/encoder/layer_6/output/dropout/div_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/dropout/div_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_6/output/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/dropout/div_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_6/output/dropout/div_grad/Neg" + input: "bert/encoder/layer_6/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/dropout/div_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_6/output/dropout/div_grad/RealDiv_1" + input: "bert/encoder/layer_6/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/dropout/div_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_6/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_6/output/dropout/div_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/dropout/div_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_6/output/dropout/div_grad/mul" + input: "gradients/bert/encoder/layer_6/output/dropout/div_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/dropout/div_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/output/dropout/div_grad/Sum_1" + input: "gradients/bert/encoder/layer_6/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_6/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_6/output/dropout/div_grad/Reshape" + input: "bert/encoder/layer_6/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_6/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_6/intermediate/dense/mul_1" + input: "gradients/bert/encoder/layer_6/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_6/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_6/intermediate/dense/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_6/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_6/intermediate/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Shape" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/Mul_1" + input: "bert/encoder/layer_6/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Mul" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Sum" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_6/intermediate/dense/mul/x" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_6/intermediate/dense/add_grad/Shape" + input: "gradients/bert/encoder/layer_6/intermediate/dense/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Reshape_1" + input: "gradients/bert/encoder/layer_6/intermediate/dense/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/intermediate/dense/add_grad/Sum" + input: "gradients/bert/encoder/layer_6/intermediate/dense/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_grad/Reshape_1" + input: "gradients/bert/encoder/layer_6/intermediate/dense/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/intermediate/dense/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_6/intermediate/dense/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/Erf_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.128379225730896 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/Erf_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_6/intermediate/dense/add_grad/Reshape_1" + input: "gradients/bert/encoder/layer_6/intermediate/dense/Erf_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/Erf_grad/Square" + op: "Square" + input: "bert/encoder/layer_6/intermediate/dense/truediv" + input: "^gradients/bert/encoder/layer_6/intermediate/dense/add_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/Erf_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_6/intermediate/dense/Erf_grad/Square" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/Erf_grad/Exp" + op: "Exp" + input: "gradients/bert/encoder/layer_6/intermediate/dense/Erf_grad/Neg" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/Erf_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_6/intermediate/dense/Erf_grad/mul" + input: "gradients/bert/encoder/layer_6/intermediate/dense/Erf_grad/Exp" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/truediv_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/truediv_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/truediv_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_6/intermediate/dense/truediv_grad/Shape" + input: "gradients/bert/encoder/layer_6/intermediate/dense/truediv_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/truediv_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_6/intermediate/dense/Erf_grad/mul_1" + input: "bert/encoder/layer_6/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/truediv_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_6/intermediate/dense/truediv_grad/RealDiv" + input: "gradients/bert/encoder/layer_6/intermediate/dense/truediv_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/truediv_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/intermediate/dense/truediv_grad/Sum" + input: "gradients/bert/encoder/layer_6/intermediate/dense/truediv_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/truediv_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_6/intermediate/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/truediv_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_6/intermediate/dense/truediv_grad/Neg" + input: "bert/encoder/layer_6/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/truediv_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_6/intermediate/dense/truediv_grad/RealDiv_1" + input: "bert/encoder/layer_6/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/truediv_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_6/intermediate/dense/Erf_grad/mul_1" + input: "gradients/bert/encoder/layer_6/intermediate/dense/truediv_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/truediv_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_6/intermediate/dense/truediv_grad/mul" + input: "gradients/bert/encoder/layer_6/intermediate/dense/truediv_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/truediv_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/intermediate/dense/truediv_grad/Sum_1" + input: "gradients/bert/encoder/layer_6/intermediate/dense/truediv_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/AddN_41" + op: "AddN" + input: "gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_6/intermediate/dense/truediv_grad/Reshape" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/intermediate/dense/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/AddN_41" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/AddN_41" + input: "bert/encoder/layer_6/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_6/intermediate/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_1" + input: "gradients/AddN_41" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_42" + op: "AddN" + input: "gradients/AddN_40" + input: "gradients/bert/encoder/layer_6/intermediate/dense/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_42" + input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_42" + input: "bert/encoder/layer_6/attention/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_42" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_42" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_6/attention/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_43" + op: "AddN" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_43" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_43" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_6/attention/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_6/attention/output/add" + input: "bert/encoder/layer_6/attention/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/mul" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_6/attention/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_44" + op: "AddN" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_44" + input: "bert/encoder/layer_6/attention/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_44" + input: "bert/encoder/layer_6/attention/output/dropout/div" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/dropout/div_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/dropout/div_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/dropout/div_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_6/attention/output/dropout/div_grad/Shape" + input: "gradients/bert/encoder/layer_6/attention/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/dropout/div_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/Mul" + input: "bert/encoder/layer_6/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/dropout/div_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_6/attention/output/dropout/div_grad/RealDiv" + input: "gradients/bert/encoder/layer_6/attention/output/dropout/div_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/dropout/div_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/output/dropout/div_grad/Sum" + input: "gradients/bert/encoder/layer_6/attention/output/dropout/div_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/dropout/div_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_6/attention/output/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/dropout/div_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_6/attention/output/dropout/div_grad/Neg" + input: "bert/encoder/layer_6/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/dropout/div_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_6/attention/output/dropout/div_grad/RealDiv_1" + input: "bert/encoder/layer_6/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/dropout/div_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_6/attention/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_6/attention/output/dropout/div_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/dropout/div_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_6/attention/output/dropout/div_grad/mul" + input: "gradients/bert/encoder/layer_6/attention/output/dropout/div_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/dropout/div_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/output/dropout/div_grad/Sum_1" + input: "gradients/bert/encoder/layer_6/attention/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_6/attention/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_6/attention/output/dropout/div_grad/Reshape" + input: "bert/encoder/layer_6/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_6/attention/self/Reshape_3" + input: "gradients/bert/encoder/layer_6/attention/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/Reshape_3_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/Reshape_3_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/output/dense/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_6/attention/self/Reshape_3_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/transpose_3_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_6/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/transpose_3_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_6/attention/self/Reshape_3_grad/Reshape" + input: "gradients/bert/encoder/layer_6/attention/self/transpose_3_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/MatMul" + op: "BatchMatMul" + input: "gradients/bert/encoder/layer_6/attention/self/transpose_3_grad/transpose" + input: "bert/encoder/layer_6/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/MatMul_1" + op: "BatchMatMul" + input: "bert/encoder/layer_6/attention/self/dropout/mul" + input: "gradients/bert/encoder/layer_6/attention/self/transpose_3_grad/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/MatMul" + input: "bert/encoder/layer_6/attention/self/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/MatMul" + input: "bert/encoder/layer_6/attention/self/dropout/div" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/transpose_2_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_6/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/transpose_2_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_6/attention/self/MatMul_1_grad/MatMul_1" + input: "gradients/bert/encoder/layer_6/attention/self/transpose_2_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/dropout/div_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/dropout/div_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/dropout/div_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_6/attention/self/dropout/div_grad/Shape" + input: "gradients/bert/encoder/layer_6/attention/self/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/dropout/div_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/Mul" + input: "bert/encoder/layer_6/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/dropout/div_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_6/attention/self/dropout/div_grad/RealDiv" + input: "gradients/bert/encoder/layer_6/attention/self/dropout/div_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/dropout/div_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/self/dropout/div_grad/Sum" + input: "gradients/bert/encoder/layer_6/attention/self/dropout/div_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/dropout/div_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_6/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/dropout/div_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_6/attention/self/dropout/div_grad/Neg" + input: "bert/encoder/layer_6/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/dropout/div_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_6/attention/self/dropout/div_grad/RealDiv_1" + input: "bert/encoder/layer_6/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/dropout/div_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_6/attention/self/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_6/attention/self/dropout/div_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/dropout/div_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_6/attention/self/dropout/div_grad/mul" + input: "gradients/bert/encoder/layer_6/attention/self/dropout/div_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/dropout/div_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/self/dropout/div_grad/Sum_1" + input: "gradients/bert/encoder/layer_6/attention/self/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/layer_6/attention/self/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/Softmax_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_6/attention/self/dropout/div_grad/Reshape" + input: "bert/encoder/layer_6/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/Softmax_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/Softmax_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_6/attention/self/Softmax_grad/mul" + input: "gradients/bert/encoder/layer_6/attention/self/Softmax_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/Softmax_grad/sub" + op: "Sub" + input: "gradients/bert/encoder/layer_6/attention/self/dropout/div_grad/Reshape" + input: "gradients/bert/encoder/layer_6/attention/self/Softmax_grad/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/Softmax_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_6/attention/self/Softmax_grad/sub" + input: "bert/encoder/layer_6/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/value/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_6/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\001\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_6/attention/self/add_grad/Shape" + input: "gradients/bert/encoder/layer_6/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_6/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_6/attention/self/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/self/add_grad/Sum" + input: "gradients/bert/encoder/layer_6/attention/self/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_6/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_6/attention/self/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/self/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_6/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/value/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_6/attention/self/Reshape_2_grad/Reshape" + input: "bert/encoder/layer_6/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/value/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_6/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Shape" + input: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_6/attention/self/add_grad/Reshape" + input: "bert/encoder/layer_6/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Mul" + input: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Sum" + input: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_6/attention/self/MatMul" + input: "gradients/bert/encoder/layer_6/attention/self/add_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/MatMul_grad/MatMul" + op: "BatchMatMul" + input: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_6/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/MatMul_grad/MatMul_1" + op: "BatchMatMul" + input: "gradients/bert/encoder/layer_6/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_6/attention/self/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_6/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_6/attention/self/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_6/attention/self/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_6/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_6/attention/self/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/layer_6/attention/self/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/self/transpose_grad/transpose" + input: "gradients/bert/encoder/layer_6/attention/self/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_6/attention/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/layer_6/attention/self/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/query/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_6/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/key/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_6/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/query/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_6/attention/self/Reshape_grad/Reshape" + input: "bert/encoder/layer_6/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/query/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_6/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/key/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_6/attention/self/Reshape_1_grad/Reshape" + input: "bert/encoder/layer_6/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_6/attention/self/key/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_6/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_45" + op: "AddN" + input: "gradients/AddN_44" + input: "gradients/bert/encoder/layer_6/attention/self/value/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_6/attention/self/query/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_6/attention/self/key/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 4 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_45" + input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_45" + input: "bert/encoder/layer_5/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_45" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_45" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_5/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_46" + op: "AddN" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_46" + input: "bert/encoder/layer_5/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_46" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_5/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_5/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_5/output/add" + input: "bert/encoder/layer_5/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_5/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/mul" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_5/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_47" + op: "AddN" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_47" + input: "bert/encoder/layer_5/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_47" + input: "bert/encoder/layer_5/output/dropout/div" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/dropout/div_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/dropout/div_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/dropout/div_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_5/output/dropout/div_grad/Shape" + input: "gradients/bert/encoder/layer_5/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/dropout/div_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/Mul" + input: "bert/encoder/layer_5/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/dropout/div_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_5/output/dropout/div_grad/RealDiv" + input: "gradients/bert/encoder/layer_5/output/dropout/div_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/dropout/div_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/output/dropout/div_grad/Sum" + input: "gradients/bert/encoder/layer_5/output/dropout/div_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/dropout/div_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_5/output/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/dropout/div_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_5/output/dropout/div_grad/Neg" + input: "bert/encoder/layer_5/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/dropout/div_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_5/output/dropout/div_grad/RealDiv_1" + input: "bert/encoder/layer_5/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/dropout/div_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_5/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_5/output/dropout/div_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/dropout/div_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_5/output/dropout/div_grad/mul" + input: "gradients/bert/encoder/layer_5/output/dropout/div_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/dropout/div_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/output/dropout/div_grad/Sum_1" + input: "gradients/bert/encoder/layer_5/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_5/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_5/output/dropout/div_grad/Reshape" + input: "bert/encoder/layer_5/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_5/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_5/intermediate/dense/mul_1" + input: "gradients/bert/encoder/layer_5/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_5/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_5/intermediate/dense/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_5/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_5/intermediate/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Shape" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/Mul_1" + input: "bert/encoder/layer_5/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Mul" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Sum" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_5/intermediate/dense/mul/x" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_5/intermediate/dense/add_grad/Shape" + input: "gradients/bert/encoder/layer_5/intermediate/dense/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Reshape_1" + input: "gradients/bert/encoder/layer_5/intermediate/dense/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/intermediate/dense/add_grad/Sum" + input: "gradients/bert/encoder/layer_5/intermediate/dense/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_grad/Reshape_1" + input: "gradients/bert/encoder/layer_5/intermediate/dense/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/intermediate/dense/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_5/intermediate/dense/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/Erf_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.128379225730896 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/Erf_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_5/intermediate/dense/add_grad/Reshape_1" + input: "gradients/bert/encoder/layer_5/intermediate/dense/Erf_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/Erf_grad/Square" + op: "Square" + input: "bert/encoder/layer_5/intermediate/dense/truediv" + input: "^gradients/bert/encoder/layer_5/intermediate/dense/add_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/Erf_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_5/intermediate/dense/Erf_grad/Square" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/Erf_grad/Exp" + op: "Exp" + input: "gradients/bert/encoder/layer_5/intermediate/dense/Erf_grad/Neg" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/Erf_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_5/intermediate/dense/Erf_grad/mul" + input: "gradients/bert/encoder/layer_5/intermediate/dense/Erf_grad/Exp" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/truediv_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/truediv_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/truediv_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_5/intermediate/dense/truediv_grad/Shape" + input: "gradients/bert/encoder/layer_5/intermediate/dense/truediv_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/truediv_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_5/intermediate/dense/Erf_grad/mul_1" + input: "bert/encoder/layer_5/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/truediv_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_5/intermediate/dense/truediv_grad/RealDiv" + input: "gradients/bert/encoder/layer_5/intermediate/dense/truediv_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/truediv_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/intermediate/dense/truediv_grad/Sum" + input: "gradients/bert/encoder/layer_5/intermediate/dense/truediv_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/truediv_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_5/intermediate/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/truediv_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_5/intermediate/dense/truediv_grad/Neg" + input: "bert/encoder/layer_5/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/truediv_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_5/intermediate/dense/truediv_grad/RealDiv_1" + input: "bert/encoder/layer_5/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/truediv_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_5/intermediate/dense/Erf_grad/mul_1" + input: "gradients/bert/encoder/layer_5/intermediate/dense/truediv_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/truediv_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_5/intermediate/dense/truediv_grad/mul" + input: "gradients/bert/encoder/layer_5/intermediate/dense/truediv_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/truediv_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/intermediate/dense/truediv_grad/Sum_1" + input: "gradients/bert/encoder/layer_5/intermediate/dense/truediv_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/AddN_48" + op: "AddN" + input: "gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_5/intermediate/dense/truediv_grad/Reshape" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/intermediate/dense/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/AddN_48" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/AddN_48" + input: "bert/encoder/layer_5/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_5/intermediate/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_1" + input: "gradients/AddN_48" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_49" + op: "AddN" + input: "gradients/AddN_47" + input: "gradients/bert/encoder/layer_5/intermediate/dense/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_49" + input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_49" + input: "bert/encoder/layer_5/attention/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_49" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_49" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_5/attention/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_50" + op: "AddN" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_50" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_50" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_5/attention/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_5/attention/output/add" + input: "bert/encoder/layer_5/attention/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/mul" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_5/attention/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_51" + op: "AddN" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_51" + input: "bert/encoder/layer_5/attention/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_51" + input: "bert/encoder/layer_5/attention/output/dropout/div" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/dropout/div_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/dropout/div_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/dropout/div_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_5/attention/output/dropout/div_grad/Shape" + input: "gradients/bert/encoder/layer_5/attention/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/dropout/div_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/Mul" + input: "bert/encoder/layer_5/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/dropout/div_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_5/attention/output/dropout/div_grad/RealDiv" + input: "gradients/bert/encoder/layer_5/attention/output/dropout/div_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/dropout/div_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/output/dropout/div_grad/Sum" + input: "gradients/bert/encoder/layer_5/attention/output/dropout/div_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/dropout/div_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_5/attention/output/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/dropout/div_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_5/attention/output/dropout/div_grad/Neg" + input: "bert/encoder/layer_5/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/dropout/div_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_5/attention/output/dropout/div_grad/RealDiv_1" + input: "bert/encoder/layer_5/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/dropout/div_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_5/attention/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_5/attention/output/dropout/div_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/dropout/div_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_5/attention/output/dropout/div_grad/mul" + input: "gradients/bert/encoder/layer_5/attention/output/dropout/div_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/dropout/div_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/output/dropout/div_grad/Sum_1" + input: "gradients/bert/encoder/layer_5/attention/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_5/attention/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_5/attention/output/dropout/div_grad/Reshape" + input: "bert/encoder/layer_5/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_5/attention/self/Reshape_3" + input: "gradients/bert/encoder/layer_5/attention/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/Reshape_3_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/Reshape_3_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/output/dense/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_5/attention/self/Reshape_3_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/transpose_3_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_5/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/transpose_3_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_5/attention/self/Reshape_3_grad/Reshape" + input: "gradients/bert/encoder/layer_5/attention/self/transpose_3_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/MatMul" + op: "BatchMatMul" + input: "gradients/bert/encoder/layer_5/attention/self/transpose_3_grad/transpose" + input: "bert/encoder/layer_5/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/MatMul_1" + op: "BatchMatMul" + input: "bert/encoder/layer_5/attention/self/dropout/mul" + input: "gradients/bert/encoder/layer_5/attention/self/transpose_3_grad/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/MatMul" + input: "bert/encoder/layer_5/attention/self/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/MatMul" + input: "bert/encoder/layer_5/attention/self/dropout/div" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/transpose_2_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_5/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/transpose_2_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_5/attention/self/MatMul_1_grad/MatMul_1" + input: "gradients/bert/encoder/layer_5/attention/self/transpose_2_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/dropout/div_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/dropout/div_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/dropout/div_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_5/attention/self/dropout/div_grad/Shape" + input: "gradients/bert/encoder/layer_5/attention/self/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/dropout/div_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/Mul" + input: "bert/encoder/layer_5/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/dropout/div_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_5/attention/self/dropout/div_grad/RealDiv" + input: "gradients/bert/encoder/layer_5/attention/self/dropout/div_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/dropout/div_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/self/dropout/div_grad/Sum" + input: "gradients/bert/encoder/layer_5/attention/self/dropout/div_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/dropout/div_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_5/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/dropout/div_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_5/attention/self/dropout/div_grad/Neg" + input: "bert/encoder/layer_5/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/dropout/div_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_5/attention/self/dropout/div_grad/RealDiv_1" + input: "bert/encoder/layer_5/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/dropout/div_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_5/attention/self/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_5/attention/self/dropout/div_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/dropout/div_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_5/attention/self/dropout/div_grad/mul" + input: "gradients/bert/encoder/layer_5/attention/self/dropout/div_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/dropout/div_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/self/dropout/div_grad/Sum_1" + input: "gradients/bert/encoder/layer_5/attention/self/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/layer_5/attention/self/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/Softmax_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_5/attention/self/dropout/div_grad/Reshape" + input: "bert/encoder/layer_5/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/Softmax_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/Softmax_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_5/attention/self/Softmax_grad/mul" + input: "gradients/bert/encoder/layer_5/attention/self/Softmax_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/Softmax_grad/sub" + op: "Sub" + input: "gradients/bert/encoder/layer_5/attention/self/dropout/div_grad/Reshape" + input: "gradients/bert/encoder/layer_5/attention/self/Softmax_grad/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/Softmax_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_5/attention/self/Softmax_grad/sub" + input: "bert/encoder/layer_5/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/value/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_5/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\001\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_5/attention/self/add_grad/Shape" + input: "gradients/bert/encoder/layer_5/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_5/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_5/attention/self/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/self/add_grad/Sum" + input: "gradients/bert/encoder/layer_5/attention/self/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_5/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_5/attention/self/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/self/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_5/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/value/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_5/attention/self/Reshape_2_grad/Reshape" + input: "bert/encoder/layer_5/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/value/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_5/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Shape" + input: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_5/attention/self/add_grad/Reshape" + input: "bert/encoder/layer_5/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Mul" + input: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Sum" + input: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_5/attention/self/MatMul" + input: "gradients/bert/encoder/layer_5/attention/self/add_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/MatMul_grad/MatMul" + op: "BatchMatMul" + input: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_5/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/MatMul_grad/MatMul_1" + op: "BatchMatMul" + input: "gradients/bert/encoder/layer_5/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_5/attention/self/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_5/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_5/attention/self/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_5/attention/self/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_5/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_5/attention/self/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/layer_5/attention/self/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/self/transpose_grad/transpose" + input: "gradients/bert/encoder/layer_5/attention/self/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_5/attention/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/layer_5/attention/self/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/query/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_5/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/key/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_5/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/query/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_5/attention/self/Reshape_grad/Reshape" + input: "bert/encoder/layer_5/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/query/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_5/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/key/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_5/attention/self/Reshape_1_grad/Reshape" + input: "bert/encoder/layer_5/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_5/attention/self/key/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_5/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_52" + op: "AddN" + input: "gradients/AddN_51" + input: "gradients/bert/encoder/layer_5/attention/self/value/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_5/attention/self/query/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_5/attention/self/key/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 4 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_52" + input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_52" + input: "bert/encoder/layer_4/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_52" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_52" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_4/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_53" + op: "AddN" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_53" + input: "bert/encoder/layer_4/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_53" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_4/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_4/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_4/output/add" + input: "bert/encoder/layer_4/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_4/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/mul" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_4/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_54" + op: "AddN" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_54" + input: "bert/encoder/layer_4/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_54" + input: "bert/encoder/layer_4/output/dropout/div" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/dropout/div_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/dropout/div_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/dropout/div_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_4/output/dropout/div_grad/Shape" + input: "gradients/bert/encoder/layer_4/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/dropout/div_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/Mul" + input: "bert/encoder/layer_4/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/dropout/div_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_4/output/dropout/div_grad/RealDiv" + input: "gradients/bert/encoder/layer_4/output/dropout/div_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/dropout/div_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/output/dropout/div_grad/Sum" + input: "gradients/bert/encoder/layer_4/output/dropout/div_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/dropout/div_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_4/output/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/dropout/div_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_4/output/dropout/div_grad/Neg" + input: "bert/encoder/layer_4/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/dropout/div_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_4/output/dropout/div_grad/RealDiv_1" + input: "bert/encoder/layer_4/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/dropout/div_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_4/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_4/output/dropout/div_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/dropout/div_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_4/output/dropout/div_grad/mul" + input: "gradients/bert/encoder/layer_4/output/dropout/div_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/dropout/div_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/output/dropout/div_grad/Sum_1" + input: "gradients/bert/encoder/layer_4/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_4/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_4/output/dropout/div_grad/Reshape" + input: "bert/encoder/layer_4/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_4/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_4/intermediate/dense/mul_1" + input: "gradients/bert/encoder/layer_4/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_4/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_4/intermediate/dense/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_4/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_4/intermediate/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Shape" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/Mul_1" + input: "bert/encoder/layer_4/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Mul" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Sum" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_4/intermediate/dense/mul/x" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_4/intermediate/dense/add_grad/Shape" + input: "gradients/bert/encoder/layer_4/intermediate/dense/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Reshape_1" + input: "gradients/bert/encoder/layer_4/intermediate/dense/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/intermediate/dense/add_grad/Sum" + input: "gradients/bert/encoder/layer_4/intermediate/dense/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_grad/Reshape_1" + input: "gradients/bert/encoder/layer_4/intermediate/dense/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/intermediate/dense/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_4/intermediate/dense/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/Erf_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.128379225730896 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/Erf_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_4/intermediate/dense/add_grad/Reshape_1" + input: "gradients/bert/encoder/layer_4/intermediate/dense/Erf_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/Erf_grad/Square" + op: "Square" + input: "bert/encoder/layer_4/intermediate/dense/truediv" + input: "^gradients/bert/encoder/layer_4/intermediate/dense/add_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/Erf_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_4/intermediate/dense/Erf_grad/Square" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/Erf_grad/Exp" + op: "Exp" + input: "gradients/bert/encoder/layer_4/intermediate/dense/Erf_grad/Neg" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/Erf_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_4/intermediate/dense/Erf_grad/mul" + input: "gradients/bert/encoder/layer_4/intermediate/dense/Erf_grad/Exp" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/truediv_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/truediv_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/truediv_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_4/intermediate/dense/truediv_grad/Shape" + input: "gradients/bert/encoder/layer_4/intermediate/dense/truediv_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/truediv_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_4/intermediate/dense/Erf_grad/mul_1" + input: "bert/encoder/layer_4/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/truediv_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_4/intermediate/dense/truediv_grad/RealDiv" + input: "gradients/bert/encoder/layer_4/intermediate/dense/truediv_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/truediv_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/intermediate/dense/truediv_grad/Sum" + input: "gradients/bert/encoder/layer_4/intermediate/dense/truediv_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/truediv_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_4/intermediate/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/truediv_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_4/intermediate/dense/truediv_grad/Neg" + input: "bert/encoder/layer_4/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/truediv_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_4/intermediate/dense/truediv_grad/RealDiv_1" + input: "bert/encoder/layer_4/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/truediv_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_4/intermediate/dense/Erf_grad/mul_1" + input: "gradients/bert/encoder/layer_4/intermediate/dense/truediv_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/truediv_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_4/intermediate/dense/truediv_grad/mul" + input: "gradients/bert/encoder/layer_4/intermediate/dense/truediv_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/truediv_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/intermediate/dense/truediv_grad/Sum_1" + input: "gradients/bert/encoder/layer_4/intermediate/dense/truediv_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/AddN_55" + op: "AddN" + input: "gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_4/intermediate/dense/truediv_grad/Reshape" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/intermediate/dense/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/AddN_55" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/AddN_55" + input: "bert/encoder/layer_4/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_4/intermediate/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_1" + input: "gradients/AddN_55" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_56" + op: "AddN" + input: "gradients/AddN_54" + input: "gradients/bert/encoder/layer_4/intermediate/dense/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_56" + input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_56" + input: "bert/encoder/layer_4/attention/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_56" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_56" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_4/attention/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_57" + op: "AddN" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_57" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_57" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_4/attention/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_4/attention/output/add" + input: "bert/encoder/layer_4/attention/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/mul" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_4/attention/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_58" + op: "AddN" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_58" + input: "bert/encoder/layer_4/attention/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_58" + input: "bert/encoder/layer_4/attention/output/dropout/div" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/dropout/div_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/dropout/div_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/dropout/div_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_4/attention/output/dropout/div_grad/Shape" + input: "gradients/bert/encoder/layer_4/attention/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/dropout/div_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/Mul" + input: "bert/encoder/layer_4/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/dropout/div_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_4/attention/output/dropout/div_grad/RealDiv" + input: "gradients/bert/encoder/layer_4/attention/output/dropout/div_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/dropout/div_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/output/dropout/div_grad/Sum" + input: "gradients/bert/encoder/layer_4/attention/output/dropout/div_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/dropout/div_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_4/attention/output/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/dropout/div_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_4/attention/output/dropout/div_grad/Neg" + input: "bert/encoder/layer_4/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/dropout/div_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_4/attention/output/dropout/div_grad/RealDiv_1" + input: "bert/encoder/layer_4/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/dropout/div_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_4/attention/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_4/attention/output/dropout/div_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/dropout/div_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_4/attention/output/dropout/div_grad/mul" + input: "gradients/bert/encoder/layer_4/attention/output/dropout/div_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/dropout/div_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/output/dropout/div_grad/Sum_1" + input: "gradients/bert/encoder/layer_4/attention/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_4/attention/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_4/attention/output/dropout/div_grad/Reshape" + input: "bert/encoder/layer_4/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_4/attention/self/Reshape_3" + input: "gradients/bert/encoder/layer_4/attention/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/Reshape_3_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/Reshape_3_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/output/dense/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_4/attention/self/Reshape_3_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/transpose_3_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_4/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/transpose_3_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_4/attention/self/Reshape_3_grad/Reshape" + input: "gradients/bert/encoder/layer_4/attention/self/transpose_3_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/MatMul" + op: "BatchMatMul" + input: "gradients/bert/encoder/layer_4/attention/self/transpose_3_grad/transpose" + input: "bert/encoder/layer_4/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/MatMul_1" + op: "BatchMatMul" + input: "bert/encoder/layer_4/attention/self/dropout/mul" + input: "gradients/bert/encoder/layer_4/attention/self/transpose_3_grad/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/MatMul" + input: "bert/encoder/layer_4/attention/self/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/MatMul" + input: "bert/encoder/layer_4/attention/self/dropout/div" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/transpose_2_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_4/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/transpose_2_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_4/attention/self/MatMul_1_grad/MatMul_1" + input: "gradients/bert/encoder/layer_4/attention/self/transpose_2_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/dropout/div_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/dropout/div_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/dropout/div_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_4/attention/self/dropout/div_grad/Shape" + input: "gradients/bert/encoder/layer_4/attention/self/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/dropout/div_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/Mul" + input: "bert/encoder/layer_4/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/dropout/div_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_4/attention/self/dropout/div_grad/RealDiv" + input: "gradients/bert/encoder/layer_4/attention/self/dropout/div_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/dropout/div_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/self/dropout/div_grad/Sum" + input: "gradients/bert/encoder/layer_4/attention/self/dropout/div_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/dropout/div_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_4/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/dropout/div_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_4/attention/self/dropout/div_grad/Neg" + input: "bert/encoder/layer_4/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/dropout/div_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_4/attention/self/dropout/div_grad/RealDiv_1" + input: "bert/encoder/layer_4/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/dropout/div_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_4/attention/self/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_4/attention/self/dropout/div_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/dropout/div_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_4/attention/self/dropout/div_grad/mul" + input: "gradients/bert/encoder/layer_4/attention/self/dropout/div_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/dropout/div_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/self/dropout/div_grad/Sum_1" + input: "gradients/bert/encoder/layer_4/attention/self/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/layer_4/attention/self/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/Softmax_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_4/attention/self/dropout/div_grad/Reshape" + input: "bert/encoder/layer_4/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/Softmax_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/Softmax_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_4/attention/self/Softmax_grad/mul" + input: "gradients/bert/encoder/layer_4/attention/self/Softmax_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/Softmax_grad/sub" + op: "Sub" + input: "gradients/bert/encoder/layer_4/attention/self/dropout/div_grad/Reshape" + input: "gradients/bert/encoder/layer_4/attention/self/Softmax_grad/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/Softmax_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_4/attention/self/Softmax_grad/sub" + input: "bert/encoder/layer_4/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/value/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_4/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\001\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_4/attention/self/add_grad/Shape" + input: "gradients/bert/encoder/layer_4/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_4/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_4/attention/self/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/self/add_grad/Sum" + input: "gradients/bert/encoder/layer_4/attention/self/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_4/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_4/attention/self/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/self/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_4/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/value/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_4/attention/self/Reshape_2_grad/Reshape" + input: "bert/encoder/layer_4/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/value/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_4/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Shape" + input: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_4/attention/self/add_grad/Reshape" + input: "bert/encoder/layer_4/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Mul" + input: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Sum" + input: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_4/attention/self/MatMul" + input: "gradients/bert/encoder/layer_4/attention/self/add_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/MatMul_grad/MatMul" + op: "BatchMatMul" + input: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_4/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/MatMul_grad/MatMul_1" + op: "BatchMatMul" + input: "gradients/bert/encoder/layer_4/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_4/attention/self/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_4/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_4/attention/self/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_4/attention/self/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_4/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_4/attention/self/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/layer_4/attention/self/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/self/transpose_grad/transpose" + input: "gradients/bert/encoder/layer_4/attention/self/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_4/attention/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/layer_4/attention/self/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/query/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_4/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/key/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_4/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/query/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_4/attention/self/Reshape_grad/Reshape" + input: "bert/encoder/layer_4/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/query/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_4/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/key/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_4/attention/self/Reshape_1_grad/Reshape" + input: "bert/encoder/layer_4/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_4/attention/self/key/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_4/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_59" + op: "AddN" + input: "gradients/AddN_58" + input: "gradients/bert/encoder/layer_4/attention/self/value/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_4/attention/self/query/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_4/attention/self/key/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 4 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_59" + input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_59" + input: "bert/encoder/layer_3/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_59" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_59" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_3/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_60" + op: "AddN" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_60" + input: "bert/encoder/layer_3/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_60" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_3/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_3/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_3/output/add" + input: "bert/encoder/layer_3/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_3/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/mul" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_3/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_61" + op: "AddN" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_61" + input: "bert/encoder/layer_3/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_61" + input: "bert/encoder/layer_3/output/dropout/div" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/dropout/div_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/dropout/div_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/dropout/div_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_3/output/dropout/div_grad/Shape" + input: "gradients/bert/encoder/layer_3/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/dropout/div_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/Mul" + input: "bert/encoder/layer_3/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/dropout/div_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_3/output/dropout/div_grad/RealDiv" + input: "gradients/bert/encoder/layer_3/output/dropout/div_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/dropout/div_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/output/dropout/div_grad/Sum" + input: "gradients/bert/encoder/layer_3/output/dropout/div_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/dropout/div_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_3/output/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/dropout/div_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_3/output/dropout/div_grad/Neg" + input: "bert/encoder/layer_3/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/dropout/div_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_3/output/dropout/div_grad/RealDiv_1" + input: "bert/encoder/layer_3/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/dropout/div_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_3/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_3/output/dropout/div_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/dropout/div_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_3/output/dropout/div_grad/mul" + input: "gradients/bert/encoder/layer_3/output/dropout/div_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/dropout/div_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/output/dropout/div_grad/Sum_1" + input: "gradients/bert/encoder/layer_3/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_3/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_3/output/dropout/div_grad/Reshape" + input: "bert/encoder/layer_3/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_3/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_3/intermediate/dense/mul_1" + input: "gradients/bert/encoder/layer_3/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_3/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_3/intermediate/dense/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_3/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_3/intermediate/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Shape" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/Mul_1" + input: "bert/encoder/layer_3/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Mul" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Sum" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_3/intermediate/dense/mul/x" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_3/intermediate/dense/add_grad/Shape" + input: "gradients/bert/encoder/layer_3/intermediate/dense/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Reshape_1" + input: "gradients/bert/encoder/layer_3/intermediate/dense/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/intermediate/dense/add_grad/Sum" + input: "gradients/bert/encoder/layer_3/intermediate/dense/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_grad/Reshape_1" + input: "gradients/bert/encoder/layer_3/intermediate/dense/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/intermediate/dense/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_3/intermediate/dense/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/Erf_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.128379225730896 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/Erf_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_3/intermediate/dense/add_grad/Reshape_1" + input: "gradients/bert/encoder/layer_3/intermediate/dense/Erf_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/Erf_grad/Square" + op: "Square" + input: "bert/encoder/layer_3/intermediate/dense/truediv" + input: "^gradients/bert/encoder/layer_3/intermediate/dense/add_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/Erf_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_3/intermediate/dense/Erf_grad/Square" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/Erf_grad/Exp" + op: "Exp" + input: "gradients/bert/encoder/layer_3/intermediate/dense/Erf_grad/Neg" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/Erf_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_3/intermediate/dense/Erf_grad/mul" + input: "gradients/bert/encoder/layer_3/intermediate/dense/Erf_grad/Exp" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/truediv_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/truediv_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/truediv_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_3/intermediate/dense/truediv_grad/Shape" + input: "gradients/bert/encoder/layer_3/intermediate/dense/truediv_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/truediv_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_3/intermediate/dense/Erf_grad/mul_1" + input: "bert/encoder/layer_3/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/truediv_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_3/intermediate/dense/truediv_grad/RealDiv" + input: "gradients/bert/encoder/layer_3/intermediate/dense/truediv_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/truediv_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/intermediate/dense/truediv_grad/Sum" + input: "gradients/bert/encoder/layer_3/intermediate/dense/truediv_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/truediv_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_3/intermediate/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/truediv_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_3/intermediate/dense/truediv_grad/Neg" + input: "bert/encoder/layer_3/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/truediv_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_3/intermediate/dense/truediv_grad/RealDiv_1" + input: "bert/encoder/layer_3/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/truediv_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_3/intermediate/dense/Erf_grad/mul_1" + input: "gradients/bert/encoder/layer_3/intermediate/dense/truediv_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/truediv_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_3/intermediate/dense/truediv_grad/mul" + input: "gradients/bert/encoder/layer_3/intermediate/dense/truediv_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/truediv_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/intermediate/dense/truediv_grad/Sum_1" + input: "gradients/bert/encoder/layer_3/intermediate/dense/truediv_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/AddN_62" + op: "AddN" + input: "gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_3/intermediate/dense/truediv_grad/Reshape" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/intermediate/dense/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/AddN_62" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/AddN_62" + input: "bert/encoder/layer_3/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_3/intermediate/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_1" + input: "gradients/AddN_62" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_63" + op: "AddN" + input: "gradients/AddN_61" + input: "gradients/bert/encoder/layer_3/intermediate/dense/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_63" + input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_63" + input: "bert/encoder/layer_3/attention/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_63" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_63" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_3/attention/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_64" + op: "AddN" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_64" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_64" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_3/attention/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_3/attention/output/add" + input: "bert/encoder/layer_3/attention/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/mul" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_3/attention/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_65" + op: "AddN" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_65" + input: "bert/encoder/layer_3/attention/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_65" + input: "bert/encoder/layer_3/attention/output/dropout/div" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/dropout/div_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/dropout/div_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/dropout/div_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_3/attention/output/dropout/div_grad/Shape" + input: "gradients/bert/encoder/layer_3/attention/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/dropout/div_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/Mul" + input: "bert/encoder/layer_3/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/dropout/div_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_3/attention/output/dropout/div_grad/RealDiv" + input: "gradients/bert/encoder/layer_3/attention/output/dropout/div_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/dropout/div_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/output/dropout/div_grad/Sum" + input: "gradients/bert/encoder/layer_3/attention/output/dropout/div_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/dropout/div_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_3/attention/output/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/dropout/div_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_3/attention/output/dropout/div_grad/Neg" + input: "bert/encoder/layer_3/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/dropout/div_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_3/attention/output/dropout/div_grad/RealDiv_1" + input: "bert/encoder/layer_3/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/dropout/div_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_3/attention/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_3/attention/output/dropout/div_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/dropout/div_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_3/attention/output/dropout/div_grad/mul" + input: "gradients/bert/encoder/layer_3/attention/output/dropout/div_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/dropout/div_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/output/dropout/div_grad/Sum_1" + input: "gradients/bert/encoder/layer_3/attention/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_3/attention/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_3/attention/output/dropout/div_grad/Reshape" + input: "bert/encoder/layer_3/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_3/attention/self/Reshape_3" + input: "gradients/bert/encoder/layer_3/attention/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/Reshape_3_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/Reshape_3_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/output/dense/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_3/attention/self/Reshape_3_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/transpose_3_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_3/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/transpose_3_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_3/attention/self/Reshape_3_grad/Reshape" + input: "gradients/bert/encoder/layer_3/attention/self/transpose_3_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/MatMul" + op: "BatchMatMul" + input: "gradients/bert/encoder/layer_3/attention/self/transpose_3_grad/transpose" + input: "bert/encoder/layer_3/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/MatMul_1" + op: "BatchMatMul" + input: "bert/encoder/layer_3/attention/self/dropout/mul" + input: "gradients/bert/encoder/layer_3/attention/self/transpose_3_grad/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/MatMul" + input: "bert/encoder/layer_3/attention/self/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/MatMul" + input: "bert/encoder/layer_3/attention/self/dropout/div" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/transpose_2_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_3/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/transpose_2_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_3/attention/self/MatMul_1_grad/MatMul_1" + input: "gradients/bert/encoder/layer_3/attention/self/transpose_2_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/dropout/div_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/dropout/div_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/dropout/div_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_3/attention/self/dropout/div_grad/Shape" + input: "gradients/bert/encoder/layer_3/attention/self/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/dropout/div_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/Mul" + input: "bert/encoder/layer_3/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/dropout/div_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_3/attention/self/dropout/div_grad/RealDiv" + input: "gradients/bert/encoder/layer_3/attention/self/dropout/div_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/dropout/div_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/self/dropout/div_grad/Sum" + input: "gradients/bert/encoder/layer_3/attention/self/dropout/div_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/dropout/div_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_3/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/dropout/div_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_3/attention/self/dropout/div_grad/Neg" + input: "bert/encoder/layer_3/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/dropout/div_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_3/attention/self/dropout/div_grad/RealDiv_1" + input: "bert/encoder/layer_3/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/dropout/div_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_3/attention/self/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_3/attention/self/dropout/div_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/dropout/div_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_3/attention/self/dropout/div_grad/mul" + input: "gradients/bert/encoder/layer_3/attention/self/dropout/div_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/dropout/div_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/self/dropout/div_grad/Sum_1" + input: "gradients/bert/encoder/layer_3/attention/self/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/layer_3/attention/self/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/Softmax_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_3/attention/self/dropout/div_grad/Reshape" + input: "bert/encoder/layer_3/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/Softmax_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/Softmax_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_3/attention/self/Softmax_grad/mul" + input: "gradients/bert/encoder/layer_3/attention/self/Softmax_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/Softmax_grad/sub" + op: "Sub" + input: "gradients/bert/encoder/layer_3/attention/self/dropout/div_grad/Reshape" + input: "gradients/bert/encoder/layer_3/attention/self/Softmax_grad/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/Softmax_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_3/attention/self/Softmax_grad/sub" + input: "bert/encoder/layer_3/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/value/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_3/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\001\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_3/attention/self/add_grad/Shape" + input: "gradients/bert/encoder/layer_3/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_3/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_3/attention/self/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/self/add_grad/Sum" + input: "gradients/bert/encoder/layer_3/attention/self/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_3/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_3/attention/self/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/self/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_3/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/value/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_3/attention/self/Reshape_2_grad/Reshape" + input: "bert/encoder/layer_3/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/value/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_3/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Shape" + input: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_3/attention/self/add_grad/Reshape" + input: "bert/encoder/layer_3/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Mul" + input: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Sum" + input: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_3/attention/self/MatMul" + input: "gradients/bert/encoder/layer_3/attention/self/add_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/MatMul_grad/MatMul" + op: "BatchMatMul" + input: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_3/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/MatMul_grad/MatMul_1" + op: "BatchMatMul" + input: "gradients/bert/encoder/layer_3/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_3/attention/self/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_3/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_3/attention/self/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_3/attention/self/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_3/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_3/attention/self/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/layer_3/attention/self/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/self/transpose_grad/transpose" + input: "gradients/bert/encoder/layer_3/attention/self/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_3/attention/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/layer_3/attention/self/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/query/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_3/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/key/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_3/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/query/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_3/attention/self/Reshape_grad/Reshape" + input: "bert/encoder/layer_3/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/query/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_3/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/key/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_3/attention/self/Reshape_1_grad/Reshape" + input: "bert/encoder/layer_3/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_3/attention/self/key/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_3/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_66" + op: "AddN" + input: "gradients/AddN_65" + input: "gradients/bert/encoder/layer_3/attention/self/value/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_3/attention/self/query/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_3/attention/self/key/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 4 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_66" + input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_66" + input: "bert/encoder/layer_2/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_66" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_66" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_2/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_67" + op: "AddN" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_67" + input: "bert/encoder/layer_2/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_67" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_2/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_2/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_2/output/add" + input: "bert/encoder/layer_2/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_2/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/mul" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_2/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_68" + op: "AddN" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_68" + input: "bert/encoder/layer_2/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_68" + input: "bert/encoder/layer_2/output/dropout/div" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/dropout/div_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/dropout/div_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/dropout/div_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_2/output/dropout/div_grad/Shape" + input: "gradients/bert/encoder/layer_2/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/dropout/div_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/Mul" + input: "bert/encoder/layer_2/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/dropout/div_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_2/output/dropout/div_grad/RealDiv" + input: "gradients/bert/encoder/layer_2/output/dropout/div_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/dropout/div_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/output/dropout/div_grad/Sum" + input: "gradients/bert/encoder/layer_2/output/dropout/div_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/dropout/div_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_2/output/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/dropout/div_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_2/output/dropout/div_grad/Neg" + input: "bert/encoder/layer_2/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/dropout/div_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_2/output/dropout/div_grad/RealDiv_1" + input: "bert/encoder/layer_2/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/dropout/div_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_2/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_2/output/dropout/div_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/dropout/div_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_2/output/dropout/div_grad/mul" + input: "gradients/bert/encoder/layer_2/output/dropout/div_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/dropout/div_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/output/dropout/div_grad/Sum_1" + input: "gradients/bert/encoder/layer_2/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_2/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_2/output/dropout/div_grad/Reshape" + input: "bert/encoder/layer_2/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_2/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_2/intermediate/dense/mul_1" + input: "gradients/bert/encoder/layer_2/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_2/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_2/intermediate/dense/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_2/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_2/intermediate/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Shape" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/Mul_1" + input: "bert/encoder/layer_2/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Mul" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Sum" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_2/intermediate/dense/mul/x" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_2/intermediate/dense/add_grad/Shape" + input: "gradients/bert/encoder/layer_2/intermediate/dense/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Reshape_1" + input: "gradients/bert/encoder/layer_2/intermediate/dense/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/intermediate/dense/add_grad/Sum" + input: "gradients/bert/encoder/layer_2/intermediate/dense/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_grad/Reshape_1" + input: "gradients/bert/encoder/layer_2/intermediate/dense/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/intermediate/dense/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_2/intermediate/dense/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/Erf_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.128379225730896 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/Erf_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_2/intermediate/dense/add_grad/Reshape_1" + input: "gradients/bert/encoder/layer_2/intermediate/dense/Erf_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/Erf_grad/Square" + op: "Square" + input: "bert/encoder/layer_2/intermediate/dense/truediv" + input: "^gradients/bert/encoder/layer_2/intermediate/dense/add_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/Erf_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_2/intermediate/dense/Erf_grad/Square" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/Erf_grad/Exp" + op: "Exp" + input: "gradients/bert/encoder/layer_2/intermediate/dense/Erf_grad/Neg" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/Erf_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_2/intermediate/dense/Erf_grad/mul" + input: "gradients/bert/encoder/layer_2/intermediate/dense/Erf_grad/Exp" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/truediv_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/truediv_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/truediv_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_2/intermediate/dense/truediv_grad/Shape" + input: "gradients/bert/encoder/layer_2/intermediate/dense/truediv_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/truediv_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_2/intermediate/dense/Erf_grad/mul_1" + input: "bert/encoder/layer_2/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/truediv_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_2/intermediate/dense/truediv_grad/RealDiv" + input: "gradients/bert/encoder/layer_2/intermediate/dense/truediv_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/truediv_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/intermediate/dense/truediv_grad/Sum" + input: "gradients/bert/encoder/layer_2/intermediate/dense/truediv_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/truediv_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_2/intermediate/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/truediv_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_2/intermediate/dense/truediv_grad/Neg" + input: "bert/encoder/layer_2/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/truediv_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_2/intermediate/dense/truediv_grad/RealDiv_1" + input: "bert/encoder/layer_2/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/truediv_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_2/intermediate/dense/Erf_grad/mul_1" + input: "gradients/bert/encoder/layer_2/intermediate/dense/truediv_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/truediv_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_2/intermediate/dense/truediv_grad/mul" + input: "gradients/bert/encoder/layer_2/intermediate/dense/truediv_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/truediv_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/intermediate/dense/truediv_grad/Sum_1" + input: "gradients/bert/encoder/layer_2/intermediate/dense/truediv_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/AddN_69" + op: "AddN" + input: "gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_2/intermediate/dense/truediv_grad/Reshape" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/intermediate/dense/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/AddN_69" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/AddN_69" + input: "bert/encoder/layer_2/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_2/intermediate/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_1" + input: "gradients/AddN_69" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_70" + op: "AddN" + input: "gradients/AddN_68" + input: "gradients/bert/encoder/layer_2/intermediate/dense/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_70" + input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_70" + input: "bert/encoder/layer_2/attention/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_70" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_70" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_2/attention/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_71" + op: "AddN" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_71" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_71" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_2/attention/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_2/attention/output/add" + input: "bert/encoder/layer_2/attention/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/mul" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_2/attention/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_72" + op: "AddN" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_72" + input: "bert/encoder/layer_2/attention/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_72" + input: "bert/encoder/layer_2/attention/output/dropout/div" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/dropout/div_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/dropout/div_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/dropout/div_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_2/attention/output/dropout/div_grad/Shape" + input: "gradients/bert/encoder/layer_2/attention/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/dropout/div_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/Mul" + input: "bert/encoder/layer_2/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/dropout/div_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_2/attention/output/dropout/div_grad/RealDiv" + input: "gradients/bert/encoder/layer_2/attention/output/dropout/div_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/dropout/div_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/output/dropout/div_grad/Sum" + input: "gradients/bert/encoder/layer_2/attention/output/dropout/div_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/dropout/div_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_2/attention/output/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/dropout/div_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_2/attention/output/dropout/div_grad/Neg" + input: "bert/encoder/layer_2/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/dropout/div_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_2/attention/output/dropout/div_grad/RealDiv_1" + input: "bert/encoder/layer_2/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/dropout/div_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_2/attention/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_2/attention/output/dropout/div_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/dropout/div_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_2/attention/output/dropout/div_grad/mul" + input: "gradients/bert/encoder/layer_2/attention/output/dropout/div_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/dropout/div_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/output/dropout/div_grad/Sum_1" + input: "gradients/bert/encoder/layer_2/attention/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_2/attention/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_2/attention/output/dropout/div_grad/Reshape" + input: "bert/encoder/layer_2/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_2/attention/self/Reshape_3" + input: "gradients/bert/encoder/layer_2/attention/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/Reshape_3_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/Reshape_3_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/output/dense/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_2/attention/self/Reshape_3_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/transpose_3_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_2/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/transpose_3_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_2/attention/self/Reshape_3_grad/Reshape" + input: "gradients/bert/encoder/layer_2/attention/self/transpose_3_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/MatMul" + op: "BatchMatMul" + input: "gradients/bert/encoder/layer_2/attention/self/transpose_3_grad/transpose" + input: "bert/encoder/layer_2/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/MatMul_1" + op: "BatchMatMul" + input: "bert/encoder/layer_2/attention/self/dropout/mul" + input: "gradients/bert/encoder/layer_2/attention/self/transpose_3_grad/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/MatMul" + input: "bert/encoder/layer_2/attention/self/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/MatMul" + input: "bert/encoder/layer_2/attention/self/dropout/div" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/transpose_2_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_2/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/transpose_2_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_2/attention/self/MatMul_1_grad/MatMul_1" + input: "gradients/bert/encoder/layer_2/attention/self/transpose_2_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/dropout/div_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/dropout/div_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/dropout/div_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_2/attention/self/dropout/div_grad/Shape" + input: "gradients/bert/encoder/layer_2/attention/self/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/dropout/div_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/Mul" + input: "bert/encoder/layer_2/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/dropout/div_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_2/attention/self/dropout/div_grad/RealDiv" + input: "gradients/bert/encoder/layer_2/attention/self/dropout/div_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/dropout/div_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/self/dropout/div_grad/Sum" + input: "gradients/bert/encoder/layer_2/attention/self/dropout/div_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/dropout/div_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_2/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/dropout/div_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_2/attention/self/dropout/div_grad/Neg" + input: "bert/encoder/layer_2/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/dropout/div_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_2/attention/self/dropout/div_grad/RealDiv_1" + input: "bert/encoder/layer_2/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/dropout/div_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_2/attention/self/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_2/attention/self/dropout/div_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/dropout/div_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_2/attention/self/dropout/div_grad/mul" + input: "gradients/bert/encoder/layer_2/attention/self/dropout/div_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/dropout/div_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/self/dropout/div_grad/Sum_1" + input: "gradients/bert/encoder/layer_2/attention/self/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/layer_2/attention/self/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/Softmax_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_2/attention/self/dropout/div_grad/Reshape" + input: "bert/encoder/layer_2/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/Softmax_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/Softmax_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_2/attention/self/Softmax_grad/mul" + input: "gradients/bert/encoder/layer_2/attention/self/Softmax_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/Softmax_grad/sub" + op: "Sub" + input: "gradients/bert/encoder/layer_2/attention/self/dropout/div_grad/Reshape" + input: "gradients/bert/encoder/layer_2/attention/self/Softmax_grad/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/Softmax_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_2/attention/self/Softmax_grad/sub" + input: "bert/encoder/layer_2/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/value/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_2/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\001\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_2/attention/self/add_grad/Shape" + input: "gradients/bert/encoder/layer_2/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_2/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_2/attention/self/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/self/add_grad/Sum" + input: "gradients/bert/encoder/layer_2/attention/self/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_2/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_2/attention/self/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/self/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_2/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/value/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_2/attention/self/Reshape_2_grad/Reshape" + input: "bert/encoder/layer_2/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/value/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_2/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Shape" + input: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_2/attention/self/add_grad/Reshape" + input: "bert/encoder/layer_2/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Mul" + input: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Sum" + input: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_2/attention/self/MatMul" + input: "gradients/bert/encoder/layer_2/attention/self/add_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/MatMul_grad/MatMul" + op: "BatchMatMul" + input: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_2/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/MatMul_grad/MatMul_1" + op: "BatchMatMul" + input: "gradients/bert/encoder/layer_2/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_2/attention/self/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_2/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_2/attention/self/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_2/attention/self/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_2/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_2/attention/self/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/layer_2/attention/self/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/self/transpose_grad/transpose" + input: "gradients/bert/encoder/layer_2/attention/self/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_2/attention/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/layer_2/attention/self/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/query/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_2/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/key/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_2/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/query/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_2/attention/self/Reshape_grad/Reshape" + input: "bert/encoder/layer_2/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/query/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_2/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/key/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_2/attention/self/Reshape_1_grad/Reshape" + input: "bert/encoder/layer_2/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_2/attention/self/key/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_2/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_73" + op: "AddN" + input: "gradients/AddN_72" + input: "gradients/bert/encoder/layer_2/attention/self/value/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_2/attention/self/query/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_2/attention/self/key/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 4 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_73" + input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_73" + input: "bert/encoder/layer_1/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_73" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_73" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_1/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_74" + op: "AddN" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_74" + input: "bert/encoder/layer_1/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_74" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_1/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_1/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_1/output/add" + input: "bert/encoder/layer_1/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_1/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/mul" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_1/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_75" + op: "AddN" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_75" + input: "bert/encoder/layer_1/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_75" + input: "bert/encoder/layer_1/output/dropout/div" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/dropout/div_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/dropout/div_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/dropout/div_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_1/output/dropout/div_grad/Shape" + input: "gradients/bert/encoder/layer_1/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/dropout/div_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/Mul" + input: "bert/encoder/layer_1/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/dropout/div_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_1/output/dropout/div_grad/RealDiv" + input: "gradients/bert/encoder/layer_1/output/dropout/div_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/dropout/div_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/output/dropout/div_grad/Sum" + input: "gradients/bert/encoder/layer_1/output/dropout/div_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/dropout/div_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_1/output/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/dropout/div_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_1/output/dropout/div_grad/Neg" + input: "bert/encoder/layer_1/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/dropout/div_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_1/output/dropout/div_grad/RealDiv_1" + input: "bert/encoder/layer_1/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/dropout/div_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_1/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_1/output/dropout/div_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/dropout/div_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_1/output/dropout/div_grad/mul" + input: "gradients/bert/encoder/layer_1/output/dropout/div_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/dropout/div_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/output/dropout/div_grad/Sum_1" + input: "gradients/bert/encoder/layer_1/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_1/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_1/output/dropout/div_grad/Reshape" + input: "bert/encoder/layer_1/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_1/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_1/intermediate/dense/mul_1" + input: "gradients/bert/encoder/layer_1/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_1/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_1/intermediate/dense/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_1/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_1/intermediate/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Shape" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/Mul_1" + input: "bert/encoder/layer_1/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Mul" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Sum" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_1/intermediate/dense/mul/x" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_1/intermediate/dense/add_grad/Shape" + input: "gradients/bert/encoder/layer_1/intermediate/dense/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Reshape_1" + input: "gradients/bert/encoder/layer_1/intermediate/dense/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/intermediate/dense/add_grad/Sum" + input: "gradients/bert/encoder/layer_1/intermediate/dense/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_grad/Reshape_1" + input: "gradients/bert/encoder/layer_1/intermediate/dense/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/intermediate/dense/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_1/intermediate/dense/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/Erf_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.128379225730896 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/Erf_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_1/intermediate/dense/add_grad/Reshape_1" + input: "gradients/bert/encoder/layer_1/intermediate/dense/Erf_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/Erf_grad/Square" + op: "Square" + input: "bert/encoder/layer_1/intermediate/dense/truediv" + input: "^gradients/bert/encoder/layer_1/intermediate/dense/add_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/Erf_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_1/intermediate/dense/Erf_grad/Square" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/Erf_grad/Exp" + op: "Exp" + input: "gradients/bert/encoder/layer_1/intermediate/dense/Erf_grad/Neg" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/Erf_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_1/intermediate/dense/Erf_grad/mul" + input: "gradients/bert/encoder/layer_1/intermediate/dense/Erf_grad/Exp" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/truediv_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/truediv_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/truediv_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_1/intermediate/dense/truediv_grad/Shape" + input: "gradients/bert/encoder/layer_1/intermediate/dense/truediv_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/truediv_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_1/intermediate/dense/Erf_grad/mul_1" + input: "bert/encoder/layer_1/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/truediv_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_1/intermediate/dense/truediv_grad/RealDiv" + input: "gradients/bert/encoder/layer_1/intermediate/dense/truediv_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/truediv_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/intermediate/dense/truediv_grad/Sum" + input: "gradients/bert/encoder/layer_1/intermediate/dense/truediv_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/truediv_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_1/intermediate/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/truediv_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_1/intermediate/dense/truediv_grad/Neg" + input: "bert/encoder/layer_1/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/truediv_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_1/intermediate/dense/truediv_grad/RealDiv_1" + input: "bert/encoder/layer_1/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/truediv_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_1/intermediate/dense/Erf_grad/mul_1" + input: "gradients/bert/encoder/layer_1/intermediate/dense/truediv_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/truediv_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_1/intermediate/dense/truediv_grad/mul" + input: "gradients/bert/encoder/layer_1/intermediate/dense/truediv_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/truediv_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/intermediate/dense/truediv_grad/Sum_1" + input: "gradients/bert/encoder/layer_1/intermediate/dense/truediv_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/AddN_76" + op: "AddN" + input: "gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_1/intermediate/dense/truediv_grad/Reshape" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/intermediate/dense/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/AddN_76" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/AddN_76" + input: "bert/encoder/layer_1/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_1/intermediate/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_1" + input: "gradients/AddN_76" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_77" + op: "AddN" + input: "gradients/AddN_75" + input: "gradients/bert/encoder/layer_1/intermediate/dense/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_77" + input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_77" + input: "bert/encoder/layer_1/attention/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_77" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_77" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_1/attention/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_78" + op: "AddN" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_78" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_78" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_1/attention/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_1/attention/output/add" + input: "bert/encoder/layer_1/attention/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/mul" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_1/attention/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_79" + op: "AddN" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_79" + input: "bert/encoder/layer_1/attention/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_79" + input: "bert/encoder/layer_1/attention/output/dropout/div" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/dropout/div_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/dropout/div_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/dropout/div_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_1/attention/output/dropout/div_grad/Shape" + input: "gradients/bert/encoder/layer_1/attention/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/dropout/div_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/Mul" + input: "bert/encoder/layer_1/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/dropout/div_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_1/attention/output/dropout/div_grad/RealDiv" + input: "gradients/bert/encoder/layer_1/attention/output/dropout/div_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/dropout/div_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/output/dropout/div_grad/Sum" + input: "gradients/bert/encoder/layer_1/attention/output/dropout/div_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/dropout/div_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_1/attention/output/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/dropout/div_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_1/attention/output/dropout/div_grad/Neg" + input: "bert/encoder/layer_1/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/dropout/div_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_1/attention/output/dropout/div_grad/RealDiv_1" + input: "bert/encoder/layer_1/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/dropout/div_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_1/attention/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_1/attention/output/dropout/div_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/dropout/div_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_1/attention/output/dropout/div_grad/mul" + input: "gradients/bert/encoder/layer_1/attention/output/dropout/div_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/dropout/div_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/output/dropout/div_grad/Sum_1" + input: "gradients/bert/encoder/layer_1/attention/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_1/attention/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_1/attention/output/dropout/div_grad/Reshape" + input: "bert/encoder/layer_1/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_1/attention/self/Reshape_3" + input: "gradients/bert/encoder/layer_1/attention/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/Reshape_3_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/Reshape_3_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/output/dense/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_1/attention/self/Reshape_3_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/transpose_3_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_1/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/transpose_3_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_1/attention/self/Reshape_3_grad/Reshape" + input: "gradients/bert/encoder/layer_1/attention/self/transpose_3_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/MatMul" + op: "BatchMatMul" + input: "gradients/bert/encoder/layer_1/attention/self/transpose_3_grad/transpose" + input: "bert/encoder/layer_1/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/MatMul_1" + op: "BatchMatMul" + input: "bert/encoder/layer_1/attention/self/dropout/mul" + input: "gradients/bert/encoder/layer_1/attention/self/transpose_3_grad/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/MatMul" + input: "bert/encoder/layer_1/attention/self/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/MatMul" + input: "bert/encoder/layer_1/attention/self/dropout/div" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/transpose_2_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_1/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/transpose_2_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_1/attention/self/MatMul_1_grad/MatMul_1" + input: "gradients/bert/encoder/layer_1/attention/self/transpose_2_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/dropout/div_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/dropout/div_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/dropout/div_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_1/attention/self/dropout/div_grad/Shape" + input: "gradients/bert/encoder/layer_1/attention/self/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/dropout/div_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/Mul" + input: "bert/encoder/layer_1/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/dropout/div_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_1/attention/self/dropout/div_grad/RealDiv" + input: "gradients/bert/encoder/layer_1/attention/self/dropout/div_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/dropout/div_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/self/dropout/div_grad/Sum" + input: "gradients/bert/encoder/layer_1/attention/self/dropout/div_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/dropout/div_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_1/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/dropout/div_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_1/attention/self/dropout/div_grad/Neg" + input: "bert/encoder/layer_1/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/dropout/div_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_1/attention/self/dropout/div_grad/RealDiv_1" + input: "bert/encoder/layer_1/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/dropout/div_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_1/attention/self/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_1/attention/self/dropout/div_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/dropout/div_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_1/attention/self/dropout/div_grad/mul" + input: "gradients/bert/encoder/layer_1/attention/self/dropout/div_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/dropout/div_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/self/dropout/div_grad/Sum_1" + input: "gradients/bert/encoder/layer_1/attention/self/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/layer_1/attention/self/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/Softmax_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_1/attention/self/dropout/div_grad/Reshape" + input: "bert/encoder/layer_1/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/Softmax_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/Softmax_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_1/attention/self/Softmax_grad/mul" + input: "gradients/bert/encoder/layer_1/attention/self/Softmax_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/Softmax_grad/sub" + op: "Sub" + input: "gradients/bert/encoder/layer_1/attention/self/dropout/div_grad/Reshape" + input: "gradients/bert/encoder/layer_1/attention/self/Softmax_grad/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/Softmax_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_1/attention/self/Softmax_grad/sub" + input: "bert/encoder/layer_1/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/value/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_1/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\001\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_1/attention/self/add_grad/Shape" + input: "gradients/bert/encoder/layer_1/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_1/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_1/attention/self/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/self/add_grad/Sum" + input: "gradients/bert/encoder/layer_1/attention/self/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_1/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_1/attention/self/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/self/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_1/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/value/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_1/attention/self/Reshape_2_grad/Reshape" + input: "bert/encoder/layer_1/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/value/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_1/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Shape" + input: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_1/attention/self/add_grad/Reshape" + input: "bert/encoder/layer_1/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Mul" + input: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Sum" + input: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_1/attention/self/MatMul" + input: "gradients/bert/encoder/layer_1/attention/self/add_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/MatMul_grad/MatMul" + op: "BatchMatMul" + input: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_1/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/MatMul_grad/MatMul_1" + op: "BatchMatMul" + input: "gradients/bert/encoder/layer_1/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_1/attention/self/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_1/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_1/attention/self/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_1/attention/self/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_1/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_1/attention/self/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/layer_1/attention/self/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/self/transpose_grad/transpose" + input: "gradients/bert/encoder/layer_1/attention/self/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_1/attention/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/layer_1/attention/self/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/query/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_1/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/key/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_1/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/query/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_1/attention/self/Reshape_grad/Reshape" + input: "bert/encoder/layer_1/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/query/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_1/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/key/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_1/attention/self/Reshape_1_grad/Reshape" + input: "bert/encoder/layer_1/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_1/attention/self/key/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/add_1" + input: "gradients/bert/encoder/layer_1/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_80" + op: "AddN" + input: "gradients/AddN_79" + input: "gradients/bert/encoder/layer_1/attention/self/value/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_1/attention/self/query/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_1/attention/self/key/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 4 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_80" + input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_80" + input: "bert/encoder/layer_0/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_80" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_80" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_0/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_81" + op: "AddN" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_81" + input: "bert/encoder/layer_0/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_81" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_0/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_0/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_0/output/add" + input: "bert/encoder/layer_0/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_0/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/mul" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_0/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_82" + op: "AddN" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_82" + input: "bert/encoder/layer_0/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_82" + input: "bert/encoder/layer_0/output/dropout/div" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/dropout/div_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/dropout/div_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/dropout/div_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_0/output/dropout/div_grad/Shape" + input: "gradients/bert/encoder/layer_0/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/dropout/div_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/Mul" + input: "bert/encoder/layer_0/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/dropout/div_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_0/output/dropout/div_grad/RealDiv" + input: "gradients/bert/encoder/layer_0/output/dropout/div_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/dropout/div_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/output/dropout/div_grad/Sum" + input: "gradients/bert/encoder/layer_0/output/dropout/div_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/dropout/div_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_0/output/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/dropout/div_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_0/output/dropout/div_grad/Neg" + input: "bert/encoder/layer_0/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/dropout/div_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_0/output/dropout/div_grad/RealDiv_1" + input: "bert/encoder/layer_0/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/dropout/div_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_0/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_0/output/dropout/div_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/dropout/div_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_0/output/dropout/div_grad/mul" + input: "gradients/bert/encoder/layer_0/output/dropout/div_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/dropout/div_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/output/dropout/div_grad/Sum_1" + input: "gradients/bert/encoder/layer_0/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_0/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_0/output/dropout/div_grad/Reshape" + input: "bert/encoder/layer_0/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_0/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_0/intermediate/dense/mul_1" + input: "gradients/bert/encoder/layer_0/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_0/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_0/intermediate/dense/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_0/output/dense/MatMul_grad/MatMul" + input: "bert/encoder/layer_0/intermediate/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Shape" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/Mul_1" + input: "bert/encoder/layer_0/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Mul" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Sum" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_0/intermediate/dense/mul/x" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_0/intermediate/dense/add_grad/Shape" + input: "gradients/bert/encoder/layer_0/intermediate/dense/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Reshape_1" + input: "gradients/bert/encoder/layer_0/intermediate/dense/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/intermediate/dense/add_grad/Sum" + input: "gradients/bert/encoder/layer_0/intermediate/dense/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_grad/Reshape_1" + input: "gradients/bert/encoder/layer_0/intermediate/dense/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/intermediate/dense/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_0/intermediate/dense/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/Erf_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.128379225730896 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/Erf_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_0/intermediate/dense/add_grad/Reshape_1" + input: "gradients/bert/encoder/layer_0/intermediate/dense/Erf_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/Erf_grad/Square" + op: "Square" + input: "bert/encoder/layer_0/intermediate/dense/truediv" + input: "^gradients/bert/encoder/layer_0/intermediate/dense/add_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/Erf_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_0/intermediate/dense/Erf_grad/Square" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/Erf_grad/Exp" + op: "Exp" + input: "gradients/bert/encoder/layer_0/intermediate/dense/Erf_grad/Neg" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/Erf_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_0/intermediate/dense/Erf_grad/mul" + input: "gradients/bert/encoder/layer_0/intermediate/dense/Erf_grad/Exp" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/truediv_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/truediv_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/truediv_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_0/intermediate/dense/truediv_grad/Shape" + input: "gradients/bert/encoder/layer_0/intermediate/dense/truediv_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/truediv_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_0/intermediate/dense/Erf_grad/mul_1" + input: "bert/encoder/layer_0/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/truediv_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_0/intermediate/dense/truediv_grad/RealDiv" + input: "gradients/bert/encoder/layer_0/intermediate/dense/truediv_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/truediv_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/intermediate/dense/truediv_grad/Sum" + input: "gradients/bert/encoder/layer_0/intermediate/dense/truediv_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/truediv_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_0/intermediate/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/truediv_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_0/intermediate/dense/truediv_grad/Neg" + input: "bert/encoder/layer_0/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/truediv_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_0/intermediate/dense/truediv_grad/RealDiv_1" + input: "bert/encoder/layer_0/intermediate/dense/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/truediv_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_0/intermediate/dense/Erf_grad/mul_1" + input: "gradients/bert/encoder/layer_0/intermediate/dense/truediv_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/truediv_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_0/intermediate/dense/truediv_grad/mul" + input: "gradients/bert/encoder/layer_0/intermediate/dense/truediv_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/truediv_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/intermediate/dense/truediv_grad/Sum_1" + input: "gradients/bert/encoder/layer_0/intermediate/dense/truediv_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/AddN_83" + op: "AddN" + input: "gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_0/intermediate/dense/truediv_grad/Reshape" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/intermediate/dense/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/AddN_83" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/AddN_83" + input: "bert/encoder/layer_0/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_0/intermediate/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_1" + input: "gradients/AddN_83" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_84" + op: "AddN" + input: "gradients/AddN_82" + input: "gradients/bert/encoder/layer_0/intermediate/dense/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_84" + input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_84" + input: "bert/encoder/layer_0/attention/output/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_84" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_84" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_0/attention/output/LayerNorm/moments/mean" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_85" + op: "AddN" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_85" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_85" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/encoder/layer_0/attention/output/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/layer_0/attention/output/add" + input: "bert/encoder/layer_0/attention/output/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/mul" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/encoder/layer_0/attention/output/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_86" + op: "AddN" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_86" + input: "bert/encoder/layer_0/attention/output/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_86" + input: "bert/encoder/layer_0/attention/output/dropout/div" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/dropout/div_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/dropout/div_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/dropout/div_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_0/attention/output/dropout/div_grad/Shape" + input: "gradients/bert/encoder/layer_0/attention/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/dropout/div_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/Mul" + input: "bert/encoder/layer_0/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/dropout/div_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_0/attention/output/dropout/div_grad/RealDiv" + input: "gradients/bert/encoder/layer_0/attention/output/dropout/div_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/dropout/div_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/output/dropout/div_grad/Sum" + input: "gradients/bert/encoder/layer_0/attention/output/dropout/div_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/dropout/div_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_0/attention/output/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/dropout/div_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_0/attention/output/dropout/div_grad/Neg" + input: "bert/encoder/layer_0/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/dropout/div_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_0/attention/output/dropout/div_grad/RealDiv_1" + input: "bert/encoder/layer_0/attention/output/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/dropout/div_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_0/attention/output/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_0/attention/output/dropout/div_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/dropout/div_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_0/attention/output/dropout/div_grad/mul" + input: "gradients/bert/encoder/layer_0/attention/output/dropout/div_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/dropout/div_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/output/dropout/div_grad/Sum_1" + input: "gradients/bert/encoder/layer_0/attention/output/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_0/attention/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_0/attention/output/dropout/div_grad/Reshape" + input: "bert/encoder/layer_0/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/output/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/layer_0/attention/self/Reshape_3" + input: "gradients/bert/encoder/layer_0/attention/output/dropout/div_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/Reshape_3_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/Reshape_3_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/output/dense/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_0/attention/self/Reshape_3_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/transpose_3_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_0/attention/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/transpose_3_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_0/attention/self/Reshape_3_grad/Reshape" + input: "gradients/bert/encoder/layer_0/attention/self/transpose_3_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/MatMul" + op: "BatchMatMul" + input: "gradients/bert/encoder/layer_0/attention/self/transpose_3_grad/transpose" + input: "bert/encoder/layer_0/attention/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/MatMul_1" + op: "BatchMatMul" + input: "bert/encoder/layer_0/attention/self/dropout/mul" + input: "gradients/bert/encoder/layer_0/attention/self/transpose_3_grad/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/MatMul" + input: "bert/encoder/layer_0/attention/self/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/MatMul" + input: "bert/encoder/layer_0/attention/self/dropout/div" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/transpose_2_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_0/attention/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/transpose_2_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_0/attention/self/MatMul_1_grad/MatMul_1" + input: "gradients/bert/encoder/layer_0/attention/self/transpose_2_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/dropout/div_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/dropout/div_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/dropout/div_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_0/attention/self/dropout/div_grad/Shape" + input: "gradients/bert/encoder/layer_0/attention/self/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/dropout/div_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/Mul" + input: "bert/encoder/layer_0/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/dropout/div_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_0/attention/self/dropout/div_grad/RealDiv" + input: "gradients/bert/encoder/layer_0/attention/self/dropout/div_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/dropout/div_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/self/dropout/div_grad/Sum" + input: "gradients/bert/encoder/layer_0/attention/self/dropout/div_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/dropout/div_grad/Neg" + op: "Neg" + input: "bert/encoder/layer_0/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/dropout/div_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/encoder/layer_0/attention/self/dropout/div_grad/Neg" + input: "bert/encoder/layer_0/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/dropout/div_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/encoder/layer_0/attention/self/dropout/div_grad/RealDiv_1" + input: "bert/encoder/layer_0/attention/self/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/dropout/div_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_0/attention/self/dropout/mul_grad/Mul" + input: "gradients/bert/encoder/layer_0/attention/self/dropout/div_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/dropout/div_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_0/attention/self/dropout/div_grad/mul" + input: "gradients/bert/encoder/layer_0/attention/self/dropout/div_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/dropout/div_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/self/dropout/div_grad/Sum_1" + input: "gradients/bert/encoder/layer_0/attention/self/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/layer_0/attention/self/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/Softmax_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/layer_0/attention/self/dropout/div_grad/Reshape" + input: "bert/encoder/layer_0/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/Softmax_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/Softmax_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_0/attention/self/Softmax_grad/mul" + input: "gradients/bert/encoder/layer_0/attention/self/Softmax_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/Softmax_grad/sub" + op: "Sub" + input: "gradients/bert/encoder/layer_0/attention/self/dropout/div_grad/Reshape" + input: "gradients/bert/encoder/layer_0/attention/self/Softmax_grad/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/Softmax_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/layer_0/attention/self/Softmax_grad/sub" + input: "bert/encoder/layer_0/attention/self/Softmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/value/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_0/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\001\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_0/attention/self/add_grad/Shape" + input: "gradients/bert/encoder/layer_0/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_0/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_0/attention/self/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/self/add_grad/Sum" + input: "gradients/bert/encoder/layer_0/attention/self/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_0/attention/self/Softmax_grad/mul_1" + input: "gradients/bert/encoder/layer_0/attention/self/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/self/add_grad/Sum_1" + input: "gradients/bert/encoder/layer_0/attention/self/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/value/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_0/attention/self/Reshape_2_grad/Reshape" + input: "bert/encoder/layer_0/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/value/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/Reshape_1" + input: "gradients/bert/encoder/layer_0/attention/self/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\014\000\000\000\200\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Shape" + input: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/layer_0/attention/self/add_grad/Reshape" + input: "bert/encoder/layer_0/attention/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Mul" + input: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Sum" + input: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/layer_0/attention/self/MatMul" + input: "gradients/bert/encoder/layer_0/attention/self/add_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Mul_1" + input: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Sum_1" + input: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/MatMul_grad/MatMul" + op: "BatchMatMul" + input: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_0/attention/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/MatMul_grad/MatMul_1" + op: "BatchMatMul" + input: "gradients/bert/encoder/layer_0/attention/self/Mul_grad/Reshape" + input: "bert/encoder/layer_0/attention/self/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 12 + } + dim { + size: 128 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_0/attention/self/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_0/attention/self/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_0/attention/self/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/layer_0/attention/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/layer_0/attention/self/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/layer_0/attention/self/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/self/transpose_grad/transpose" + input: "gradients/bert/encoder/layer_0/attention/self/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/layer_0/attention/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/layer_0/attention/self/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/query/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_0/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/key/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/encoder/layer_0/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/query/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_0/attention/self/Reshape_grad/Reshape" + input: "bert/encoder/layer_0/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/query/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/Reshape_1" + input: "gradients/bert/encoder/layer_0/attention/self/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/key/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/layer_0/attention/self/Reshape_1_grad/Reshape" + input: "bert/encoder/layer_0/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/layer_0/attention/self/key/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/Reshape_1" + input: "gradients/bert/encoder/layer_0/attention/self/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_87" + op: "AddN" + input: "gradients/AddN_86" + input: "gradients/bert/encoder/layer_0/attention/self/value/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_0/attention/self/query/MatMul_grad/MatMul" + input: "gradients/bert/encoder/layer_0/attention/self/key/MatMul_grad/MatMul" + attr { + key: "N" + value { + i: 4 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/AddN_87" + input: "gradients/bert/encoder/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/dropout/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/Reshape_1_grad/Reshape" + input: "bert/embeddings/dropout/Floor" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/dropout/mul_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/Reshape_1_grad/Reshape" + input: "bert/embeddings/dropout/div" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/dropout/div_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/dropout/div_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/dropout/div_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/embeddings/dropout/div_grad/Shape" + input: "gradients/bert/embeddings/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/dropout/div_grad/RealDiv" + op: "RealDiv" + input: "gradients/bert/embeddings/dropout/mul_grad/Mul" + input: "bert/embeddings/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/dropout/div_grad/Sum" + op: "Sum" + input: "gradients/bert/embeddings/dropout/div_grad/RealDiv" + input: "gradients/bert/embeddings/dropout/div_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/embeddings/dropout/div_grad/Reshape" + op: "Reshape" + input: "gradients/bert/embeddings/dropout/div_grad/Sum" + input: "gradients/bert/embeddings/dropout/div_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/dropout/div_grad/Neg" + op: "Neg" + input: "bert/embeddings/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/dropout/div_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/bert/embeddings/dropout/div_grad/Neg" + input: "bert/embeddings/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/dropout/div_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/bert/embeddings/dropout/div_grad/RealDiv_1" + input: "bert/embeddings/dropout/keep_prob" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/dropout/div_grad/mul" + op: "Mul" + input: "gradients/bert/embeddings/dropout/mul_grad/Mul" + input: "gradients/bert/embeddings/dropout/div_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/dropout/div_grad/Sum_1" + op: "Sum" + input: "gradients/bert/embeddings/dropout/div_grad/mul" + input: "gradients/bert/embeddings/dropout/div_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/embeddings/dropout/div_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/embeddings/dropout/div_grad/Sum_1" + input: "gradients/bert/embeddings/dropout/div_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/embeddings/dropout/div_grad/Reshape" + input: "bert/embeddings/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/bert/embeddings/dropout/div_grad/Reshape" + input: "bert/embeddings/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Shape" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/bert/embeddings/dropout/div_grad/Reshape" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Sum_1" + op: "Sum" + input: "gradients/bert/embeddings/dropout/div_grad/Reshape" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Neg" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Shape" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Reshape_1" + input: "bert/embeddings/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/embeddings/LayerNorm/moments/mean" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Sum_1" + op: "Sum" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Mul_1" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Sum_1" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_88" + op: "AddN" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Reshape_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Shape" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_88" + input: "bert/embeddings/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/embeddings/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_88" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/embeddings/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Shape" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Sum_1" + op: "Sum" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Sum_1" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 3 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/add" + op: "Add" + input: "bert/embeddings/LayerNorm/moments/variance/reduction_indices" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape_1" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/DynamicStitch" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Reshape" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Shape" + input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/embeddings/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/mul" + op: "Mul" + input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/embeddings/add_1" + input: "bert/embeddings/LayerNorm/moments/StopGradient" + input: "^gradients/bert/embeddings/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/mul" + input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Sum_1" + op: "Sum" + input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Sum_1" + input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Size" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 3 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/add" + op: "Add" + input: "bert/embeddings/LayerNorm/moments/mean/reduction_indices" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/mod" + op: "FloorMod" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/add" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape_1" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/range/start" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/range/delta" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/range" + op: "Range" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/range/start" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Size" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Fill/value" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Fill" + op: "Fill" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape_1" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/range" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/mod" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Maximum/y" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Maximum" + op: "Maximum" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/DynamicStitch" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/moments/mean_grad/Shape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/DynamicStitch" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/floordiv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_89" + op: "AddN" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Reshape" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/add_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: " \000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/add_1_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/add_1_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/embeddings/add_1_grad/Shape" + input: "gradients/bert/embeddings/add_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/add_1_grad/Sum" + op: "Sum" + input: "gradients/AddN_89" + input: "gradients/bert/embeddings/add_1_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/embeddings/add_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/embeddings/add_1_grad/Sum" + input: "gradients/bert/embeddings/add_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/add_1_grad/Sum_1" + op: "Sum" + input: "gradients/AddN_89" + input: "gradients/bert/embeddings/add_1_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/embeddings/add_1_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/embeddings/add_1_grad/Sum_1" + input: "gradients/bert/embeddings/add_1_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/Reshape_3_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/Reshape_3_grad/Reshape" + op: "Reshape" + input: "gradients/bert/embeddings/add_1_grad/Reshape_1" + input: "gradients/bert/embeddings/Reshape_3_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: " \000\000\000\200\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/embeddings/add_1_grad/Reshape" + input: "gradients/bert/embeddings/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + dim { + size: 128 + } + dim { + size: 1 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\020\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/embeddings/add_1_grad/Reshape" + input: "gradients/bert/embeddings/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/Slice_grad/Rank" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/embeddings/Slice_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/Slice_grad/stack/1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/embeddings/Slice_grad/stack" + op: "Pack" + input: "gradients/bert/embeddings/Slice_grad/Rank" + input: "gradients/bert/embeddings/Slice_grad/stack/1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "axis" + value { + i: 0 + } + } +} +node { + name: "gradients/bert/embeddings/Slice_grad/Reshape" + op: "Reshape" + input: "bert/embeddings/Slice/begin" + input: "gradients/bert/embeddings/Slice_grad/stack" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/Slice_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\002\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/Slice_grad/sub" + op: "Sub" + input: "gradients/bert/embeddings/Slice_grad/Shape_1" + input: "gradients/bert/embeddings/Slice_grad/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/Slice_grad/sub_1" + op: "Sub" + input: "gradients/bert/embeddings/Slice_grad/sub" + input: "bert/embeddings/Slice/begin" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/Slice_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/embeddings/Slice_grad/sub_1" + input: "gradients/bert/embeddings/Slice_grad/stack" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/Slice_grad/concat/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/embeddings/Slice_grad/concat" + op: "ConcatV2" + input: "gradients/bert/embeddings/Slice_grad/Reshape" + input: "gradients/bert/embeddings/Slice_grad/Reshape_1" + input: "gradients/bert/embeddings/Slice_grad/concat/axis" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/Slice_grad/Pad" + op: "Pad" + input: "gradients/bert/embeddings/Reshape_3_grad/Reshape" + input: "gradients/bert/embeddings/Slice_grad/concat" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tpaddings" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/embeddings/Reshape_2_grad/Reshape" + input: "bert/embeddings/token_type_embeddings/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 2 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/embeddings/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/embeddings/one_hot" + input: "gradients/bert/embeddings/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/embeddings/embedding_lookup_grad/Shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "Dq\000\000\000\000\000\000\000\003\000\000\000\000\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/embedding_lookup_grad/ToInt32" + op: "Cast" + input: "gradients/bert/embeddings/embedding_lookup_grad/Shape" + attr { + key: "DstT" + value { + type: DT_INT32 + } + } + attr { + key: "SrcT" + value { + type: DT_INT64 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/embedding_lookup_grad/Size" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 4096 + } + } + } +} +node { + name: "gradients/bert/embeddings/embedding_lookup_grad/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/embeddings/embedding_lookup_grad/ExpandDims" + op: "ExpandDims" + input: "gradients/bert/embeddings/embedding_lookup_grad/Size" + input: "gradients/bert/embeddings/embedding_lookup_grad/ExpandDims/dim" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tdim" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/embedding_lookup_grad/strided_slice/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/embeddings/embedding_lookup_grad/strided_slice/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/embeddings/embedding_lookup_grad/strided_slice/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/embeddings/embedding_lookup_grad/strided_slice" + op: "StridedSlice" + input: "gradients/bert/embeddings/embedding_lookup_grad/ToInt32" + input: "gradients/bert/embeddings/embedding_lookup_grad/strided_slice/stack" + input: "gradients/bert/embeddings/embedding_lookup_grad/strided_slice/stack_1" + input: "gradients/bert/embeddings/embedding_lookup_grad/strided_slice/stack_2" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 1 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "gradients/bert/embeddings/embedding_lookup_grad/concat/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/embeddings/embedding_lookup_grad/concat" + op: "ConcatV2" + input: "gradients/bert/embeddings/embedding_lookup_grad/ExpandDims" + input: "gradients/bert/embeddings/embedding_lookup_grad/strided_slice" + input: "gradients/bert/embeddings/embedding_lookup_grad/concat/axis" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/embedding_lookup_grad/Reshape" + op: "Reshape" + input: "gradients/bert/embeddings/Reshape_grad/Reshape" + input: "gradients/bert/embeddings/embedding_lookup_grad/concat" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/embedding_lookup_grad/Reshape_1" + op: "Reshape" + input: "bert/embeddings/ExpandDims" + input: "gradients/bert/embeddings/embedding_lookup_grad/ExpandDims" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4096 + } + } + } + } + } +} +node { + name: "gradients/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "Dq\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/strided_slice/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "gradients/strided_slice/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/strided_slice/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/strided_slice" + op: "StridedSlice" + input: "gradients/Shape_1" + input: "gradients/strided_slice/stack" + input: "gradients/strided_slice/stack_1" + input: "gradients/strided_slice/stack_2" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 1 + } + } +} +node { + name: "gradients/range/start" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/range/delta" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/range" + op: "Range" + input: "gradients/range/start" + input: "gradients/strided_slice" + input: "gradients/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + } + } + } + } +} +node { + name: "gradients/concat/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/concat" + op: "ConcatV2" + input: "gradients/cls/predictions/MatMul_grad/MatMul_1" + input: "gradients/bert/embeddings/embedding_lookup_grad/Reshape" + input: "gradients/concat/axis" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 33092 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/concat_1/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/concat_1" + op: "ConcatV2" + input: "gradients/range" + input: "gradients/bert/embeddings/embedding_lookup_grad/Reshape_1" + input: "gradients/concat_1/axis" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 33092 + } + } + } + } + } +} +node { + name: "global_norm/L2Loss" + op: "L2Loss" + input: "gradients/concat" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/concat" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_1" + op: "L2Loss" + input: "gradients/bert/embeddings/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_2" + op: "L2Loss" + input: "gradients/bert/embeddings/Slice_grad/Pad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/Slice_grad/Pad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_3" + op: "L2Loss" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_4" + op: "L2Loss" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_5" + op: "L2Loss" + input: "gradients/bert/encoder/layer_0/attention/self/query/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_6" + op: "L2Loss" + input: "gradients/bert/encoder/layer_0/attention/self/query/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_7" + op: "L2Loss" + input: "gradients/bert/encoder/layer_0/attention/self/key/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_8" + op: "L2Loss" + input: "gradients/bert/encoder/layer_0/attention/self/key/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_9" + op: "L2Loss" + input: "gradients/bert/encoder/layer_0/attention/self/value/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_10" + op: "L2Loss" + input: "gradients/bert/encoder/layer_0/attention/self/value/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_11" + op: "L2Loss" + input: "gradients/bert/encoder/layer_0/attention/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_12" + op: "L2Loss" + input: "gradients/bert/encoder/layer_0/attention/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_13" + op: "L2Loss" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_14" + op: "L2Loss" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_15" + op: "L2Loss" + input: "gradients/bert/encoder/layer_0/intermediate/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_16" + op: "L2Loss" + input: "gradients/bert/encoder/layer_0/intermediate/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_17" + op: "L2Loss" + input: "gradients/bert/encoder/layer_0/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_18" + op: "L2Loss" + input: "gradients/bert/encoder/layer_0/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_19" + op: "L2Loss" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_20" + op: "L2Loss" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_21" + op: "L2Loss" + input: "gradients/bert/encoder/layer_1/attention/self/query/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_22" + op: "L2Loss" + input: "gradients/bert/encoder/layer_1/attention/self/query/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_23" + op: "L2Loss" + input: "gradients/bert/encoder/layer_1/attention/self/key/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_24" + op: "L2Loss" + input: "gradients/bert/encoder/layer_1/attention/self/key/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_25" + op: "L2Loss" + input: "gradients/bert/encoder/layer_1/attention/self/value/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_26" + op: "L2Loss" + input: "gradients/bert/encoder/layer_1/attention/self/value/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_27" + op: "L2Loss" + input: "gradients/bert/encoder/layer_1/attention/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_28" + op: "L2Loss" + input: "gradients/bert/encoder/layer_1/attention/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_29" + op: "L2Loss" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_30" + op: "L2Loss" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_31" + op: "L2Loss" + input: "gradients/bert/encoder/layer_1/intermediate/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_32" + op: "L2Loss" + input: "gradients/bert/encoder/layer_1/intermediate/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_33" + op: "L2Loss" + input: "gradients/bert/encoder/layer_1/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_34" + op: "L2Loss" + input: "gradients/bert/encoder/layer_1/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_35" + op: "L2Loss" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_36" + op: "L2Loss" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_37" + op: "L2Loss" + input: "gradients/bert/encoder/layer_2/attention/self/query/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_38" + op: "L2Loss" + input: "gradients/bert/encoder/layer_2/attention/self/query/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_39" + op: "L2Loss" + input: "gradients/bert/encoder/layer_2/attention/self/key/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_40" + op: "L2Loss" + input: "gradients/bert/encoder/layer_2/attention/self/key/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_41" + op: "L2Loss" + input: "gradients/bert/encoder/layer_2/attention/self/value/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_42" + op: "L2Loss" + input: "gradients/bert/encoder/layer_2/attention/self/value/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_43" + op: "L2Loss" + input: "gradients/bert/encoder/layer_2/attention/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_44" + op: "L2Loss" + input: "gradients/bert/encoder/layer_2/attention/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_45" + op: "L2Loss" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_46" + op: "L2Loss" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_47" + op: "L2Loss" + input: "gradients/bert/encoder/layer_2/intermediate/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_48" + op: "L2Loss" + input: "gradients/bert/encoder/layer_2/intermediate/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_49" + op: "L2Loss" + input: "gradients/bert/encoder/layer_2/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_50" + op: "L2Loss" + input: "gradients/bert/encoder/layer_2/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_51" + op: "L2Loss" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_52" + op: "L2Loss" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_53" + op: "L2Loss" + input: "gradients/bert/encoder/layer_3/attention/self/query/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_54" + op: "L2Loss" + input: "gradients/bert/encoder/layer_3/attention/self/query/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_55" + op: "L2Loss" + input: "gradients/bert/encoder/layer_3/attention/self/key/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_56" + op: "L2Loss" + input: "gradients/bert/encoder/layer_3/attention/self/key/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_57" + op: "L2Loss" + input: "gradients/bert/encoder/layer_3/attention/self/value/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_58" + op: "L2Loss" + input: "gradients/bert/encoder/layer_3/attention/self/value/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_59" + op: "L2Loss" + input: "gradients/bert/encoder/layer_3/attention/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_60" + op: "L2Loss" + input: "gradients/bert/encoder/layer_3/attention/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_61" + op: "L2Loss" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_62" + op: "L2Loss" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_63" + op: "L2Loss" + input: "gradients/bert/encoder/layer_3/intermediate/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_64" + op: "L2Loss" + input: "gradients/bert/encoder/layer_3/intermediate/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_65" + op: "L2Loss" + input: "gradients/bert/encoder/layer_3/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_66" + op: "L2Loss" + input: "gradients/bert/encoder/layer_3/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_67" + op: "L2Loss" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_68" + op: "L2Loss" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_69" + op: "L2Loss" + input: "gradients/bert/encoder/layer_4/attention/self/query/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_70" + op: "L2Loss" + input: "gradients/bert/encoder/layer_4/attention/self/query/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_71" + op: "L2Loss" + input: "gradients/bert/encoder/layer_4/attention/self/key/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_72" + op: "L2Loss" + input: "gradients/bert/encoder/layer_4/attention/self/key/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_73" + op: "L2Loss" + input: "gradients/bert/encoder/layer_4/attention/self/value/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_74" + op: "L2Loss" + input: "gradients/bert/encoder/layer_4/attention/self/value/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_75" + op: "L2Loss" + input: "gradients/bert/encoder/layer_4/attention/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_76" + op: "L2Loss" + input: "gradients/bert/encoder/layer_4/attention/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_77" + op: "L2Loss" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_78" + op: "L2Loss" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_79" + op: "L2Loss" + input: "gradients/bert/encoder/layer_4/intermediate/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_80" + op: "L2Loss" + input: "gradients/bert/encoder/layer_4/intermediate/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_81" + op: "L2Loss" + input: "gradients/bert/encoder/layer_4/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_82" + op: "L2Loss" + input: "gradients/bert/encoder/layer_4/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_83" + op: "L2Loss" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_84" + op: "L2Loss" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_85" + op: "L2Loss" + input: "gradients/bert/encoder/layer_5/attention/self/query/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_86" + op: "L2Loss" + input: "gradients/bert/encoder/layer_5/attention/self/query/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_87" + op: "L2Loss" + input: "gradients/bert/encoder/layer_5/attention/self/key/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_88" + op: "L2Loss" + input: "gradients/bert/encoder/layer_5/attention/self/key/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_89" + op: "L2Loss" + input: "gradients/bert/encoder/layer_5/attention/self/value/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_90" + op: "L2Loss" + input: "gradients/bert/encoder/layer_5/attention/self/value/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_91" + op: "L2Loss" + input: "gradients/bert/encoder/layer_5/attention/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_92" + op: "L2Loss" + input: "gradients/bert/encoder/layer_5/attention/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_93" + op: "L2Loss" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_94" + op: "L2Loss" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_95" + op: "L2Loss" + input: "gradients/bert/encoder/layer_5/intermediate/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_96" + op: "L2Loss" + input: "gradients/bert/encoder/layer_5/intermediate/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_97" + op: "L2Loss" + input: "gradients/bert/encoder/layer_5/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_98" + op: "L2Loss" + input: "gradients/bert/encoder/layer_5/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_99" + op: "L2Loss" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_100" + op: "L2Loss" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_101" + op: "L2Loss" + input: "gradients/bert/encoder/layer_6/attention/self/query/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_102" + op: "L2Loss" + input: "gradients/bert/encoder/layer_6/attention/self/query/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_103" + op: "L2Loss" + input: "gradients/bert/encoder/layer_6/attention/self/key/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_104" + op: "L2Loss" + input: "gradients/bert/encoder/layer_6/attention/self/key/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_105" + op: "L2Loss" + input: "gradients/bert/encoder/layer_6/attention/self/value/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_106" + op: "L2Loss" + input: "gradients/bert/encoder/layer_6/attention/self/value/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_107" + op: "L2Loss" + input: "gradients/bert/encoder/layer_6/attention/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_108" + op: "L2Loss" + input: "gradients/bert/encoder/layer_6/attention/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_109" + op: "L2Loss" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_110" + op: "L2Loss" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_111" + op: "L2Loss" + input: "gradients/bert/encoder/layer_6/intermediate/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_112" + op: "L2Loss" + input: "gradients/bert/encoder/layer_6/intermediate/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_113" + op: "L2Loss" + input: "gradients/bert/encoder/layer_6/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_114" + op: "L2Loss" + input: "gradients/bert/encoder/layer_6/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_115" + op: "L2Loss" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_116" + op: "L2Loss" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_117" + op: "L2Loss" + input: "gradients/bert/encoder/layer_7/attention/self/query/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_118" + op: "L2Loss" + input: "gradients/bert/encoder/layer_7/attention/self/query/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_119" + op: "L2Loss" + input: "gradients/bert/encoder/layer_7/attention/self/key/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_120" + op: "L2Loss" + input: "gradients/bert/encoder/layer_7/attention/self/key/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_121" + op: "L2Loss" + input: "gradients/bert/encoder/layer_7/attention/self/value/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_122" + op: "L2Loss" + input: "gradients/bert/encoder/layer_7/attention/self/value/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_123" + op: "L2Loss" + input: "gradients/bert/encoder/layer_7/attention/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_124" + op: "L2Loss" + input: "gradients/bert/encoder/layer_7/attention/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_125" + op: "L2Loss" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_126" + op: "L2Loss" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_127" + op: "L2Loss" + input: "gradients/bert/encoder/layer_7/intermediate/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_128" + op: "L2Loss" + input: "gradients/bert/encoder/layer_7/intermediate/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_129" + op: "L2Loss" + input: "gradients/bert/encoder/layer_7/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_130" + op: "L2Loss" + input: "gradients/bert/encoder/layer_7/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_131" + op: "L2Loss" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_132" + op: "L2Loss" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_133" + op: "L2Loss" + input: "gradients/bert/encoder/layer_8/attention/self/query/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_134" + op: "L2Loss" + input: "gradients/bert/encoder/layer_8/attention/self/query/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_135" + op: "L2Loss" + input: "gradients/bert/encoder/layer_8/attention/self/key/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_136" + op: "L2Loss" + input: "gradients/bert/encoder/layer_8/attention/self/key/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_137" + op: "L2Loss" + input: "gradients/bert/encoder/layer_8/attention/self/value/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_138" + op: "L2Loss" + input: "gradients/bert/encoder/layer_8/attention/self/value/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_139" + op: "L2Loss" + input: "gradients/bert/encoder/layer_8/attention/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_140" + op: "L2Loss" + input: "gradients/bert/encoder/layer_8/attention/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_141" + op: "L2Loss" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_142" + op: "L2Loss" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_143" + op: "L2Loss" + input: "gradients/bert/encoder/layer_8/intermediate/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_144" + op: "L2Loss" + input: "gradients/bert/encoder/layer_8/intermediate/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_145" + op: "L2Loss" + input: "gradients/bert/encoder/layer_8/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_146" + op: "L2Loss" + input: "gradients/bert/encoder/layer_8/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_147" + op: "L2Loss" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_148" + op: "L2Loss" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_149" + op: "L2Loss" + input: "gradients/bert/encoder/layer_9/attention/self/query/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_150" + op: "L2Loss" + input: "gradients/bert/encoder/layer_9/attention/self/query/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_151" + op: "L2Loss" + input: "gradients/bert/encoder/layer_9/attention/self/key/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_152" + op: "L2Loss" + input: "gradients/bert/encoder/layer_9/attention/self/key/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_153" + op: "L2Loss" + input: "gradients/bert/encoder/layer_9/attention/self/value/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_154" + op: "L2Loss" + input: "gradients/bert/encoder/layer_9/attention/self/value/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_155" + op: "L2Loss" + input: "gradients/bert/encoder/layer_9/attention/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_156" + op: "L2Loss" + input: "gradients/bert/encoder/layer_9/attention/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_157" + op: "L2Loss" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_158" + op: "L2Loss" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_159" + op: "L2Loss" + input: "gradients/bert/encoder/layer_9/intermediate/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_160" + op: "L2Loss" + input: "gradients/bert/encoder/layer_9/intermediate/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_161" + op: "L2Loss" + input: "gradients/bert/encoder/layer_9/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_162" + op: "L2Loss" + input: "gradients/bert/encoder/layer_9/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_163" + op: "L2Loss" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_164" + op: "L2Loss" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_165" + op: "L2Loss" + input: "gradients/bert/encoder/layer_10/attention/self/query/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_166" + op: "L2Loss" + input: "gradients/bert/encoder/layer_10/attention/self/query/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_167" + op: "L2Loss" + input: "gradients/bert/encoder/layer_10/attention/self/key/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_168" + op: "L2Loss" + input: "gradients/bert/encoder/layer_10/attention/self/key/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_169" + op: "L2Loss" + input: "gradients/bert/encoder/layer_10/attention/self/value/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_170" + op: "L2Loss" + input: "gradients/bert/encoder/layer_10/attention/self/value/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_171" + op: "L2Loss" + input: "gradients/bert/encoder/layer_10/attention/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_172" + op: "L2Loss" + input: "gradients/bert/encoder/layer_10/attention/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_173" + op: "L2Loss" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_174" + op: "L2Loss" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_175" + op: "L2Loss" + input: "gradients/bert/encoder/layer_10/intermediate/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_176" + op: "L2Loss" + input: "gradients/bert/encoder/layer_10/intermediate/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_177" + op: "L2Loss" + input: "gradients/bert/encoder/layer_10/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_178" + op: "L2Loss" + input: "gradients/bert/encoder/layer_10/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_179" + op: "L2Loss" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_180" + op: "L2Loss" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_181" + op: "L2Loss" + input: "gradients/bert/encoder/layer_11/attention/self/query/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_182" + op: "L2Loss" + input: "gradients/bert/encoder/layer_11/attention/self/query/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_183" + op: "L2Loss" + input: "gradients/bert/encoder/layer_11/attention/self/key/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_184" + op: "L2Loss" + input: "gradients/bert/encoder/layer_11/attention/self/key/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_185" + op: "L2Loss" + input: "gradients/bert/encoder/layer_11/attention/self/value/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_186" + op: "L2Loss" + input: "gradients/bert/encoder/layer_11/attention/self/value/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_187" + op: "L2Loss" + input: "gradients/bert/encoder/layer_11/attention/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_188" + op: "L2Loss" + input: "gradients/bert/encoder/layer_11/attention/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_189" + op: "L2Loss" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_190" + op: "L2Loss" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_191" + op: "L2Loss" + input: "gradients/bert/encoder/layer_11/intermediate/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_192" + op: "L2Loss" + input: "gradients/bert/encoder/layer_11/intermediate/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_193" + op: "L2Loss" + input: "gradients/bert/encoder/layer_11/output/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_194" + op: "L2Loss" + input: "gradients/bert/encoder/layer_11/output/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_195" + op: "L2Loss" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_196" + op: "L2Loss" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_197" + op: "L2Loss" + input: "gradients/bert/pooler/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/pooler/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_198" + op: "L2Loss" + input: "gradients/bert/pooler/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/pooler/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_199" + op: "L2Loss" + input: "gradients/cls/predictions/transform/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_200" + op: "L2Loss" + input: "gradients/cls/predictions/transform/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_201" + op: "L2Loss" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_202" + op: "L2Loss" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_203" + op: "L2Loss" + input: "gradients/cls/predictions/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_204" + op: "L2Loss" + input: "gradients/cls/seq_relationship/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/seq_relationship/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_205" + op: "L2Loss" + input: "gradients/cls/seq_relationship/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/seq_relationship/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/stack" + op: "Pack" + input: "global_norm/L2Loss" + input: "global_norm/L2Loss_1" + input: "global_norm/L2Loss_2" + input: "global_norm/L2Loss_3" + input: "global_norm/L2Loss_4" + input: "global_norm/L2Loss_5" + input: "global_norm/L2Loss_6" + input: "global_norm/L2Loss_7" + input: "global_norm/L2Loss_8" + input: "global_norm/L2Loss_9" + input: "global_norm/L2Loss_10" + input: "global_norm/L2Loss_11" + input: "global_norm/L2Loss_12" + input: "global_norm/L2Loss_13" + input: "global_norm/L2Loss_14" + input: "global_norm/L2Loss_15" + input: "global_norm/L2Loss_16" + input: "global_norm/L2Loss_17" + input: "global_norm/L2Loss_18" + input: "global_norm/L2Loss_19" + input: "global_norm/L2Loss_20" + input: "global_norm/L2Loss_21" + input: "global_norm/L2Loss_22" + input: "global_norm/L2Loss_23" + input: "global_norm/L2Loss_24" + input: "global_norm/L2Loss_25" + input: "global_norm/L2Loss_26" + input: "global_norm/L2Loss_27" + input: "global_norm/L2Loss_28" + input: "global_norm/L2Loss_29" + input: "global_norm/L2Loss_30" + input: "global_norm/L2Loss_31" + input: "global_norm/L2Loss_32" + input: "global_norm/L2Loss_33" + input: "global_norm/L2Loss_34" + input: "global_norm/L2Loss_35" + input: "global_norm/L2Loss_36" + input: "global_norm/L2Loss_37" + input: "global_norm/L2Loss_38" + input: "global_norm/L2Loss_39" + input: "global_norm/L2Loss_40" + input: "global_norm/L2Loss_41" + input: "global_norm/L2Loss_42" + input: "global_norm/L2Loss_43" + input: "global_norm/L2Loss_44" + input: "global_norm/L2Loss_45" + input: "global_norm/L2Loss_46" + input: "global_norm/L2Loss_47" + input: "global_norm/L2Loss_48" + input: "global_norm/L2Loss_49" + input: "global_norm/L2Loss_50" + input: "global_norm/L2Loss_51" + input: "global_norm/L2Loss_52" + input: "global_norm/L2Loss_53" + input: "global_norm/L2Loss_54" + input: "global_norm/L2Loss_55" + input: "global_norm/L2Loss_56" + input: "global_norm/L2Loss_57" + input: "global_norm/L2Loss_58" + input: "global_norm/L2Loss_59" + input: "global_norm/L2Loss_60" + input: "global_norm/L2Loss_61" + input: "global_norm/L2Loss_62" + input: "global_norm/L2Loss_63" + input: "global_norm/L2Loss_64" + input: "global_norm/L2Loss_65" + input: "global_norm/L2Loss_66" + input: "global_norm/L2Loss_67" + input: "global_norm/L2Loss_68" + input: "global_norm/L2Loss_69" + input: "global_norm/L2Loss_70" + input: "global_norm/L2Loss_71" + input: "global_norm/L2Loss_72" + input: "global_norm/L2Loss_73" + input: "global_norm/L2Loss_74" + input: "global_norm/L2Loss_75" + input: "global_norm/L2Loss_76" + input: "global_norm/L2Loss_77" + input: "global_norm/L2Loss_78" + input: "global_norm/L2Loss_79" + input: "global_norm/L2Loss_80" + input: "global_norm/L2Loss_81" + input: "global_norm/L2Loss_82" + input: "global_norm/L2Loss_83" + input: "global_norm/L2Loss_84" + input: "global_norm/L2Loss_85" + input: "global_norm/L2Loss_86" + input: "global_norm/L2Loss_87" + input: "global_norm/L2Loss_88" + input: "global_norm/L2Loss_89" + input: "global_norm/L2Loss_90" + input: "global_norm/L2Loss_91" + input: "global_norm/L2Loss_92" + input: "global_norm/L2Loss_93" + input: "global_norm/L2Loss_94" + input: "global_norm/L2Loss_95" + input: "global_norm/L2Loss_96" + input: "global_norm/L2Loss_97" + input: "global_norm/L2Loss_98" + input: "global_norm/L2Loss_99" + input: "global_norm/L2Loss_100" + input: "global_norm/L2Loss_101" + input: "global_norm/L2Loss_102" + input: "global_norm/L2Loss_103" + input: "global_norm/L2Loss_104" + input: "global_norm/L2Loss_105" + input: "global_norm/L2Loss_106" + input: "global_norm/L2Loss_107" + input: "global_norm/L2Loss_108" + input: "global_norm/L2Loss_109" + input: "global_norm/L2Loss_110" + input: "global_norm/L2Loss_111" + input: "global_norm/L2Loss_112" + input: "global_norm/L2Loss_113" + input: "global_norm/L2Loss_114" + input: "global_norm/L2Loss_115" + input: "global_norm/L2Loss_116" + input: "global_norm/L2Loss_117" + input: "global_norm/L2Loss_118" + input: "global_norm/L2Loss_119" + input: "global_norm/L2Loss_120" + input: "global_norm/L2Loss_121" + input: "global_norm/L2Loss_122" + input: "global_norm/L2Loss_123" + input: "global_norm/L2Loss_124" + input: "global_norm/L2Loss_125" + input: "global_norm/L2Loss_126" + input: "global_norm/L2Loss_127" + input: "global_norm/L2Loss_128" + input: "global_norm/L2Loss_129" + input: "global_norm/L2Loss_130" + input: "global_norm/L2Loss_131" + input: "global_norm/L2Loss_132" + input: "global_norm/L2Loss_133" + input: "global_norm/L2Loss_134" + input: "global_norm/L2Loss_135" + input: "global_norm/L2Loss_136" + input: "global_norm/L2Loss_137" + input: "global_norm/L2Loss_138" + input: "global_norm/L2Loss_139" + input: "global_norm/L2Loss_140" + input: "global_norm/L2Loss_141" + input: "global_norm/L2Loss_142" + input: "global_norm/L2Loss_143" + input: "global_norm/L2Loss_144" + input: "global_norm/L2Loss_145" + input: "global_norm/L2Loss_146" + input: "global_norm/L2Loss_147" + input: "global_norm/L2Loss_148" + input: "global_norm/L2Loss_149" + input: "global_norm/L2Loss_150" + input: "global_norm/L2Loss_151" + input: "global_norm/L2Loss_152" + input: "global_norm/L2Loss_153" + input: "global_norm/L2Loss_154" + input: "global_norm/L2Loss_155" + input: "global_norm/L2Loss_156" + input: "global_norm/L2Loss_157" + input: "global_norm/L2Loss_158" + input: "global_norm/L2Loss_159" + input: "global_norm/L2Loss_160" + input: "global_norm/L2Loss_161" + input: "global_norm/L2Loss_162" + input: "global_norm/L2Loss_163" + input: "global_norm/L2Loss_164" + input: "global_norm/L2Loss_165" + input: "global_norm/L2Loss_166" + input: "global_norm/L2Loss_167" + input: "global_norm/L2Loss_168" + input: "global_norm/L2Loss_169" + input: "global_norm/L2Loss_170" + input: "global_norm/L2Loss_171" + input: "global_norm/L2Loss_172" + input: "global_norm/L2Loss_173" + input: "global_norm/L2Loss_174" + input: "global_norm/L2Loss_175" + input: "global_norm/L2Loss_176" + input: "global_norm/L2Loss_177" + input: "global_norm/L2Loss_178" + input: "global_norm/L2Loss_179" + input: "global_norm/L2Loss_180" + input: "global_norm/L2Loss_181" + input: "global_norm/L2Loss_182" + input: "global_norm/L2Loss_183" + input: "global_norm/L2Loss_184" + input: "global_norm/L2Loss_185" + input: "global_norm/L2Loss_186" + input: "global_norm/L2Loss_187" + input: "global_norm/L2Loss_188" + input: "global_norm/L2Loss_189" + input: "global_norm/L2Loss_190" + input: "global_norm/L2Loss_191" + input: "global_norm/L2Loss_192" + input: "global_norm/L2Loss_193" + input: "global_norm/L2Loss_194" + input: "global_norm/L2Loss_195" + input: "global_norm/L2Loss_196" + input: "global_norm/L2Loss_197" + input: "global_norm/L2Loss_198" + input: "global_norm/L2Loss_199" + input: "global_norm/L2Loss_200" + input: "global_norm/L2Loss_201" + input: "global_norm/L2Loss_202" + input: "global_norm/L2Loss_203" + input: "global_norm/L2Loss_204" + input: "global_norm/L2Loss_205" + attr { + key: "N" + value { + i: 206 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 206 + } + } + } + } + } + attr { + key: "axis" + value { + i: 0 + } + } +} +node { + name: "global_norm/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "global_norm/Sum" + op: "Sum" + input: "global_norm/stack" + input: "global_norm/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "global_norm/Const_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "global_norm/mul" + op: "Mul" + input: "global_norm/Sum" + input: "global_norm/Const_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/global_norm" + op: "Sqrt" + input: "global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "VerifyFinite/CheckNumerics" + op: "CheckNumerics" + input: "global_norm/global_norm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@global_norm/global_norm" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "message" + value { + s: "Found Inf or NaN global norm." + } + } +} +node { + name: "VerifyFinite/control_dependency" + op: "Identity" + input: "global_norm/global_norm" + input: "^VerifyFinite/CheckNumerics" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@global_norm/global_norm" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "clip_by_global_norm/truediv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "clip_by_global_norm/truediv" + op: "RealDiv" + input: "clip_by_global_norm/truediv/x" + input: "VerifyFinite/control_dependency" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "clip_by_global_norm/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "clip_by_global_norm/truediv_1/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "clip_by_global_norm/truediv_1" + op: "RealDiv" + input: "clip_by_global_norm/Const" + input: "clip_by_global_norm/truediv_1/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "clip_by_global_norm/Minimum" + op: "Minimum" + input: "clip_by_global_norm/truediv" + input: "clip_by_global_norm/truediv_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "clip_by_global_norm/mul/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "clip_by_global_norm/mul" + op: "Mul" + input: "clip_by_global_norm/mul/x" + input: "clip_by_global_norm/Minimum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_1" + op: "Mul" + input: "gradients/concat" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/concat" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 33092 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_0" + op: "Identity" + input: "clip_by_global_norm/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/concat" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 33092 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_2" + op: "Mul" + input: "gradients/bert/embeddings/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_1" + op: "Identity" + input: "clip_by_global_norm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_3" + op: "Mul" + input: "gradients/bert/embeddings/Slice_grad/Pad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/Slice_grad/Pad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_2" + op: "Identity" + input: "clip_by_global_norm/mul_3" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/Slice_grad/Pad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_4" + op: "Mul" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_3" + op: "Identity" + input: "clip_by_global_norm/mul_4" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_5" + op: "Mul" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_4" + op: "Identity" + input: "clip_by_global_norm/mul_5" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_6" + op: "Mul" + input: "gradients/bert/encoder/layer_0/attention/self/query/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_5" + op: "Identity" + input: "clip_by_global_norm/mul_6" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_7" + op: "Mul" + input: "gradients/bert/encoder/layer_0/attention/self/query/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_6" + op: "Identity" + input: "clip_by_global_norm/mul_7" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_8" + op: "Mul" + input: "gradients/bert/encoder/layer_0/attention/self/key/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_7" + op: "Identity" + input: "clip_by_global_norm/mul_8" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_9" + op: "Mul" + input: "gradients/bert/encoder/layer_0/attention/self/key/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_8" + op: "Identity" + input: "clip_by_global_norm/mul_9" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_10" + op: "Mul" + input: "gradients/bert/encoder/layer_0/attention/self/value/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_9" + op: "Identity" + input: "clip_by_global_norm/mul_10" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_11" + op: "Mul" + input: "gradients/bert/encoder/layer_0/attention/self/value/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_10" + op: "Identity" + input: "clip_by_global_norm/mul_11" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_12" + op: "Mul" + input: "gradients/bert/encoder/layer_0/attention/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_11" + op: "Identity" + input: "clip_by_global_norm/mul_12" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_13" + op: "Mul" + input: "gradients/bert/encoder/layer_0/attention/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_12" + op: "Identity" + input: "clip_by_global_norm/mul_13" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_14" + op: "Mul" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_13" + op: "Identity" + input: "clip_by_global_norm/mul_14" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_15" + op: "Mul" + input: "gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_14" + op: "Identity" + input: "clip_by_global_norm/mul_15" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_16" + op: "Mul" + input: "gradients/bert/encoder/layer_0/intermediate/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_15" + op: "Identity" + input: "clip_by_global_norm/mul_16" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_17" + op: "Mul" + input: "gradients/bert/encoder/layer_0/intermediate/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_16" + op: "Identity" + input: "clip_by_global_norm/mul_17" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_18" + op: "Mul" + input: "gradients/bert/encoder/layer_0/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_17" + op: "Identity" + input: "clip_by_global_norm/mul_18" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_19" + op: "Mul" + input: "gradients/bert/encoder/layer_0/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_18" + op: "Identity" + input: "clip_by_global_norm/mul_19" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_20" + op: "Mul" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_19" + op: "Identity" + input: "clip_by_global_norm/mul_20" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_21" + op: "Mul" + input: "gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_20" + op: "Identity" + input: "clip_by_global_norm/mul_21" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_0/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_22" + op: "Mul" + input: "gradients/bert/encoder/layer_1/attention/self/query/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_21" + op: "Identity" + input: "clip_by_global_norm/mul_22" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_23" + op: "Mul" + input: "gradients/bert/encoder/layer_1/attention/self/query/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_22" + op: "Identity" + input: "clip_by_global_norm/mul_23" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_24" + op: "Mul" + input: "gradients/bert/encoder/layer_1/attention/self/key/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_23" + op: "Identity" + input: "clip_by_global_norm/mul_24" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_25" + op: "Mul" + input: "gradients/bert/encoder/layer_1/attention/self/key/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_24" + op: "Identity" + input: "clip_by_global_norm/mul_25" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_26" + op: "Mul" + input: "gradients/bert/encoder/layer_1/attention/self/value/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_25" + op: "Identity" + input: "clip_by_global_norm/mul_26" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_27" + op: "Mul" + input: "gradients/bert/encoder/layer_1/attention/self/value/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_26" + op: "Identity" + input: "clip_by_global_norm/mul_27" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_28" + op: "Mul" + input: "gradients/bert/encoder/layer_1/attention/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_27" + op: "Identity" + input: "clip_by_global_norm/mul_28" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_29" + op: "Mul" + input: "gradients/bert/encoder/layer_1/attention/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_28" + op: "Identity" + input: "clip_by_global_norm/mul_29" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_30" + op: "Mul" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_29" + op: "Identity" + input: "clip_by_global_norm/mul_30" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_31" + op: "Mul" + input: "gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_30" + op: "Identity" + input: "clip_by_global_norm/mul_31" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_32" + op: "Mul" + input: "gradients/bert/encoder/layer_1/intermediate/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_31" + op: "Identity" + input: "clip_by_global_norm/mul_32" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_33" + op: "Mul" + input: "gradients/bert/encoder/layer_1/intermediate/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_32" + op: "Identity" + input: "clip_by_global_norm/mul_33" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_34" + op: "Mul" + input: "gradients/bert/encoder/layer_1/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_33" + op: "Identity" + input: "clip_by_global_norm/mul_34" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_35" + op: "Mul" + input: "gradients/bert/encoder/layer_1/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_34" + op: "Identity" + input: "clip_by_global_norm/mul_35" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_36" + op: "Mul" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_35" + op: "Identity" + input: "clip_by_global_norm/mul_36" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_37" + op: "Mul" + input: "gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_36" + op: "Identity" + input: "clip_by_global_norm/mul_37" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_1/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_38" + op: "Mul" + input: "gradients/bert/encoder/layer_2/attention/self/query/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_37" + op: "Identity" + input: "clip_by_global_norm/mul_38" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_39" + op: "Mul" + input: "gradients/bert/encoder/layer_2/attention/self/query/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_38" + op: "Identity" + input: "clip_by_global_norm/mul_39" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_40" + op: "Mul" + input: "gradients/bert/encoder/layer_2/attention/self/key/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_39" + op: "Identity" + input: "clip_by_global_norm/mul_40" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_41" + op: "Mul" + input: "gradients/bert/encoder/layer_2/attention/self/key/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_40" + op: "Identity" + input: "clip_by_global_norm/mul_41" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_42" + op: "Mul" + input: "gradients/bert/encoder/layer_2/attention/self/value/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_41" + op: "Identity" + input: "clip_by_global_norm/mul_42" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_43" + op: "Mul" + input: "gradients/bert/encoder/layer_2/attention/self/value/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_42" + op: "Identity" + input: "clip_by_global_norm/mul_43" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_44" + op: "Mul" + input: "gradients/bert/encoder/layer_2/attention/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_43" + op: "Identity" + input: "clip_by_global_norm/mul_44" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_45" + op: "Mul" + input: "gradients/bert/encoder/layer_2/attention/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_44" + op: "Identity" + input: "clip_by_global_norm/mul_45" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_46" + op: "Mul" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_45" + op: "Identity" + input: "clip_by_global_norm/mul_46" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_47" + op: "Mul" + input: "gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_46" + op: "Identity" + input: "clip_by_global_norm/mul_47" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_48" + op: "Mul" + input: "gradients/bert/encoder/layer_2/intermediate/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_47" + op: "Identity" + input: "clip_by_global_norm/mul_48" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_49" + op: "Mul" + input: "gradients/bert/encoder/layer_2/intermediate/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_48" + op: "Identity" + input: "clip_by_global_norm/mul_49" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_50" + op: "Mul" + input: "gradients/bert/encoder/layer_2/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_49" + op: "Identity" + input: "clip_by_global_norm/mul_50" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_51" + op: "Mul" + input: "gradients/bert/encoder/layer_2/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_50" + op: "Identity" + input: "clip_by_global_norm/mul_51" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_52" + op: "Mul" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_51" + op: "Identity" + input: "clip_by_global_norm/mul_52" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_53" + op: "Mul" + input: "gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_52" + op: "Identity" + input: "clip_by_global_norm/mul_53" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_2/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_54" + op: "Mul" + input: "gradients/bert/encoder/layer_3/attention/self/query/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_53" + op: "Identity" + input: "clip_by_global_norm/mul_54" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_55" + op: "Mul" + input: "gradients/bert/encoder/layer_3/attention/self/query/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_54" + op: "Identity" + input: "clip_by_global_norm/mul_55" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_56" + op: "Mul" + input: "gradients/bert/encoder/layer_3/attention/self/key/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_55" + op: "Identity" + input: "clip_by_global_norm/mul_56" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_57" + op: "Mul" + input: "gradients/bert/encoder/layer_3/attention/self/key/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_56" + op: "Identity" + input: "clip_by_global_norm/mul_57" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_58" + op: "Mul" + input: "gradients/bert/encoder/layer_3/attention/self/value/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_57" + op: "Identity" + input: "clip_by_global_norm/mul_58" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_59" + op: "Mul" + input: "gradients/bert/encoder/layer_3/attention/self/value/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_58" + op: "Identity" + input: "clip_by_global_norm/mul_59" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_60" + op: "Mul" + input: "gradients/bert/encoder/layer_3/attention/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_59" + op: "Identity" + input: "clip_by_global_norm/mul_60" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_61" + op: "Mul" + input: "gradients/bert/encoder/layer_3/attention/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_60" + op: "Identity" + input: "clip_by_global_norm/mul_61" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_62" + op: "Mul" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_61" + op: "Identity" + input: "clip_by_global_norm/mul_62" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_63" + op: "Mul" + input: "gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_62" + op: "Identity" + input: "clip_by_global_norm/mul_63" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_64" + op: "Mul" + input: "gradients/bert/encoder/layer_3/intermediate/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_63" + op: "Identity" + input: "clip_by_global_norm/mul_64" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_65" + op: "Mul" + input: "gradients/bert/encoder/layer_3/intermediate/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_64" + op: "Identity" + input: "clip_by_global_norm/mul_65" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_66" + op: "Mul" + input: "gradients/bert/encoder/layer_3/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_65" + op: "Identity" + input: "clip_by_global_norm/mul_66" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_67" + op: "Mul" + input: "gradients/bert/encoder/layer_3/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_66" + op: "Identity" + input: "clip_by_global_norm/mul_67" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_68" + op: "Mul" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_67" + op: "Identity" + input: "clip_by_global_norm/mul_68" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_69" + op: "Mul" + input: "gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_68" + op: "Identity" + input: "clip_by_global_norm/mul_69" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_3/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_70" + op: "Mul" + input: "gradients/bert/encoder/layer_4/attention/self/query/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_69" + op: "Identity" + input: "clip_by_global_norm/mul_70" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_71" + op: "Mul" + input: "gradients/bert/encoder/layer_4/attention/self/query/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_70" + op: "Identity" + input: "clip_by_global_norm/mul_71" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_72" + op: "Mul" + input: "gradients/bert/encoder/layer_4/attention/self/key/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_71" + op: "Identity" + input: "clip_by_global_norm/mul_72" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_73" + op: "Mul" + input: "gradients/bert/encoder/layer_4/attention/self/key/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_72" + op: "Identity" + input: "clip_by_global_norm/mul_73" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_74" + op: "Mul" + input: "gradients/bert/encoder/layer_4/attention/self/value/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_73" + op: "Identity" + input: "clip_by_global_norm/mul_74" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_75" + op: "Mul" + input: "gradients/bert/encoder/layer_4/attention/self/value/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_74" + op: "Identity" + input: "clip_by_global_norm/mul_75" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_76" + op: "Mul" + input: "gradients/bert/encoder/layer_4/attention/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_75" + op: "Identity" + input: "clip_by_global_norm/mul_76" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_77" + op: "Mul" + input: "gradients/bert/encoder/layer_4/attention/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_76" + op: "Identity" + input: "clip_by_global_norm/mul_77" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_78" + op: "Mul" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_77" + op: "Identity" + input: "clip_by_global_norm/mul_78" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_79" + op: "Mul" + input: "gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_78" + op: "Identity" + input: "clip_by_global_norm/mul_79" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_80" + op: "Mul" + input: "gradients/bert/encoder/layer_4/intermediate/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_79" + op: "Identity" + input: "clip_by_global_norm/mul_80" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_81" + op: "Mul" + input: "gradients/bert/encoder/layer_4/intermediate/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_80" + op: "Identity" + input: "clip_by_global_norm/mul_81" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_82" + op: "Mul" + input: "gradients/bert/encoder/layer_4/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_81" + op: "Identity" + input: "clip_by_global_norm/mul_82" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_83" + op: "Mul" + input: "gradients/bert/encoder/layer_4/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_82" + op: "Identity" + input: "clip_by_global_norm/mul_83" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_84" + op: "Mul" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_83" + op: "Identity" + input: "clip_by_global_norm/mul_84" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_85" + op: "Mul" + input: "gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_84" + op: "Identity" + input: "clip_by_global_norm/mul_85" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_4/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_86" + op: "Mul" + input: "gradients/bert/encoder/layer_5/attention/self/query/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_85" + op: "Identity" + input: "clip_by_global_norm/mul_86" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_87" + op: "Mul" + input: "gradients/bert/encoder/layer_5/attention/self/query/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_86" + op: "Identity" + input: "clip_by_global_norm/mul_87" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_88" + op: "Mul" + input: "gradients/bert/encoder/layer_5/attention/self/key/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_87" + op: "Identity" + input: "clip_by_global_norm/mul_88" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_89" + op: "Mul" + input: "gradients/bert/encoder/layer_5/attention/self/key/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_88" + op: "Identity" + input: "clip_by_global_norm/mul_89" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_90" + op: "Mul" + input: "gradients/bert/encoder/layer_5/attention/self/value/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_89" + op: "Identity" + input: "clip_by_global_norm/mul_90" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_91" + op: "Mul" + input: "gradients/bert/encoder/layer_5/attention/self/value/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_90" + op: "Identity" + input: "clip_by_global_norm/mul_91" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_92" + op: "Mul" + input: "gradients/bert/encoder/layer_5/attention/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_91" + op: "Identity" + input: "clip_by_global_norm/mul_92" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_93" + op: "Mul" + input: "gradients/bert/encoder/layer_5/attention/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_92" + op: "Identity" + input: "clip_by_global_norm/mul_93" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_94" + op: "Mul" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_93" + op: "Identity" + input: "clip_by_global_norm/mul_94" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_95" + op: "Mul" + input: "gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_94" + op: "Identity" + input: "clip_by_global_norm/mul_95" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_96" + op: "Mul" + input: "gradients/bert/encoder/layer_5/intermediate/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_95" + op: "Identity" + input: "clip_by_global_norm/mul_96" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_97" + op: "Mul" + input: "gradients/bert/encoder/layer_5/intermediate/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_96" + op: "Identity" + input: "clip_by_global_norm/mul_97" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_98" + op: "Mul" + input: "gradients/bert/encoder/layer_5/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_97" + op: "Identity" + input: "clip_by_global_norm/mul_98" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_99" + op: "Mul" + input: "gradients/bert/encoder/layer_5/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_98" + op: "Identity" + input: "clip_by_global_norm/mul_99" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_100" + op: "Mul" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_99" + op: "Identity" + input: "clip_by_global_norm/mul_100" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_101" + op: "Mul" + input: "gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_100" + op: "Identity" + input: "clip_by_global_norm/mul_101" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_5/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_102" + op: "Mul" + input: "gradients/bert/encoder/layer_6/attention/self/query/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_101" + op: "Identity" + input: "clip_by_global_norm/mul_102" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_103" + op: "Mul" + input: "gradients/bert/encoder/layer_6/attention/self/query/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_102" + op: "Identity" + input: "clip_by_global_norm/mul_103" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_104" + op: "Mul" + input: "gradients/bert/encoder/layer_6/attention/self/key/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_103" + op: "Identity" + input: "clip_by_global_norm/mul_104" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_105" + op: "Mul" + input: "gradients/bert/encoder/layer_6/attention/self/key/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_104" + op: "Identity" + input: "clip_by_global_norm/mul_105" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_106" + op: "Mul" + input: "gradients/bert/encoder/layer_6/attention/self/value/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_105" + op: "Identity" + input: "clip_by_global_norm/mul_106" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_107" + op: "Mul" + input: "gradients/bert/encoder/layer_6/attention/self/value/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_106" + op: "Identity" + input: "clip_by_global_norm/mul_107" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_108" + op: "Mul" + input: "gradients/bert/encoder/layer_6/attention/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_107" + op: "Identity" + input: "clip_by_global_norm/mul_108" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_109" + op: "Mul" + input: "gradients/bert/encoder/layer_6/attention/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_108" + op: "Identity" + input: "clip_by_global_norm/mul_109" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_110" + op: "Mul" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_109" + op: "Identity" + input: "clip_by_global_norm/mul_110" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_111" + op: "Mul" + input: "gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_110" + op: "Identity" + input: "clip_by_global_norm/mul_111" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_112" + op: "Mul" + input: "gradients/bert/encoder/layer_6/intermediate/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_111" + op: "Identity" + input: "clip_by_global_norm/mul_112" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_113" + op: "Mul" + input: "gradients/bert/encoder/layer_6/intermediate/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_112" + op: "Identity" + input: "clip_by_global_norm/mul_113" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_114" + op: "Mul" + input: "gradients/bert/encoder/layer_6/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_113" + op: "Identity" + input: "clip_by_global_norm/mul_114" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_115" + op: "Mul" + input: "gradients/bert/encoder/layer_6/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_114" + op: "Identity" + input: "clip_by_global_norm/mul_115" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_116" + op: "Mul" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_115" + op: "Identity" + input: "clip_by_global_norm/mul_116" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_117" + op: "Mul" + input: "gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_116" + op: "Identity" + input: "clip_by_global_norm/mul_117" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_6/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_118" + op: "Mul" + input: "gradients/bert/encoder/layer_7/attention/self/query/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_117" + op: "Identity" + input: "clip_by_global_norm/mul_118" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_119" + op: "Mul" + input: "gradients/bert/encoder/layer_7/attention/self/query/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_118" + op: "Identity" + input: "clip_by_global_norm/mul_119" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_120" + op: "Mul" + input: "gradients/bert/encoder/layer_7/attention/self/key/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_119" + op: "Identity" + input: "clip_by_global_norm/mul_120" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_121" + op: "Mul" + input: "gradients/bert/encoder/layer_7/attention/self/key/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_120" + op: "Identity" + input: "clip_by_global_norm/mul_121" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_122" + op: "Mul" + input: "gradients/bert/encoder/layer_7/attention/self/value/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_121" + op: "Identity" + input: "clip_by_global_norm/mul_122" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_123" + op: "Mul" + input: "gradients/bert/encoder/layer_7/attention/self/value/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_122" + op: "Identity" + input: "clip_by_global_norm/mul_123" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_124" + op: "Mul" + input: "gradients/bert/encoder/layer_7/attention/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_123" + op: "Identity" + input: "clip_by_global_norm/mul_124" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_125" + op: "Mul" + input: "gradients/bert/encoder/layer_7/attention/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_124" + op: "Identity" + input: "clip_by_global_norm/mul_125" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_126" + op: "Mul" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_125" + op: "Identity" + input: "clip_by_global_norm/mul_126" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_127" + op: "Mul" + input: "gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_126" + op: "Identity" + input: "clip_by_global_norm/mul_127" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_128" + op: "Mul" + input: "gradients/bert/encoder/layer_7/intermediate/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_127" + op: "Identity" + input: "clip_by_global_norm/mul_128" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_129" + op: "Mul" + input: "gradients/bert/encoder/layer_7/intermediate/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_128" + op: "Identity" + input: "clip_by_global_norm/mul_129" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_130" + op: "Mul" + input: "gradients/bert/encoder/layer_7/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_129" + op: "Identity" + input: "clip_by_global_norm/mul_130" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_131" + op: "Mul" + input: "gradients/bert/encoder/layer_7/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_130" + op: "Identity" + input: "clip_by_global_norm/mul_131" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_132" + op: "Mul" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_131" + op: "Identity" + input: "clip_by_global_norm/mul_132" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_133" + op: "Mul" + input: "gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_132" + op: "Identity" + input: "clip_by_global_norm/mul_133" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_7/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_134" + op: "Mul" + input: "gradients/bert/encoder/layer_8/attention/self/query/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_133" + op: "Identity" + input: "clip_by_global_norm/mul_134" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_135" + op: "Mul" + input: "gradients/bert/encoder/layer_8/attention/self/query/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_134" + op: "Identity" + input: "clip_by_global_norm/mul_135" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_136" + op: "Mul" + input: "gradients/bert/encoder/layer_8/attention/self/key/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_135" + op: "Identity" + input: "clip_by_global_norm/mul_136" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_137" + op: "Mul" + input: "gradients/bert/encoder/layer_8/attention/self/key/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_136" + op: "Identity" + input: "clip_by_global_norm/mul_137" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_138" + op: "Mul" + input: "gradients/bert/encoder/layer_8/attention/self/value/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_137" + op: "Identity" + input: "clip_by_global_norm/mul_138" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_139" + op: "Mul" + input: "gradients/bert/encoder/layer_8/attention/self/value/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_138" + op: "Identity" + input: "clip_by_global_norm/mul_139" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_140" + op: "Mul" + input: "gradients/bert/encoder/layer_8/attention/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_139" + op: "Identity" + input: "clip_by_global_norm/mul_140" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_141" + op: "Mul" + input: "gradients/bert/encoder/layer_8/attention/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_140" + op: "Identity" + input: "clip_by_global_norm/mul_141" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_142" + op: "Mul" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_141" + op: "Identity" + input: "clip_by_global_norm/mul_142" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_143" + op: "Mul" + input: "gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_142" + op: "Identity" + input: "clip_by_global_norm/mul_143" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_144" + op: "Mul" + input: "gradients/bert/encoder/layer_8/intermediate/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_143" + op: "Identity" + input: "clip_by_global_norm/mul_144" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_145" + op: "Mul" + input: "gradients/bert/encoder/layer_8/intermediate/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_144" + op: "Identity" + input: "clip_by_global_norm/mul_145" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_146" + op: "Mul" + input: "gradients/bert/encoder/layer_8/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_145" + op: "Identity" + input: "clip_by_global_norm/mul_146" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_147" + op: "Mul" + input: "gradients/bert/encoder/layer_8/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_146" + op: "Identity" + input: "clip_by_global_norm/mul_147" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_148" + op: "Mul" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_147" + op: "Identity" + input: "clip_by_global_norm/mul_148" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_149" + op: "Mul" + input: "gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_148" + op: "Identity" + input: "clip_by_global_norm/mul_149" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_8/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_150" + op: "Mul" + input: "gradients/bert/encoder/layer_9/attention/self/query/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_149" + op: "Identity" + input: "clip_by_global_norm/mul_150" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_151" + op: "Mul" + input: "gradients/bert/encoder/layer_9/attention/self/query/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_150" + op: "Identity" + input: "clip_by_global_norm/mul_151" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_152" + op: "Mul" + input: "gradients/bert/encoder/layer_9/attention/self/key/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_151" + op: "Identity" + input: "clip_by_global_norm/mul_152" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_153" + op: "Mul" + input: "gradients/bert/encoder/layer_9/attention/self/key/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_152" + op: "Identity" + input: "clip_by_global_norm/mul_153" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_154" + op: "Mul" + input: "gradients/bert/encoder/layer_9/attention/self/value/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_153" + op: "Identity" + input: "clip_by_global_norm/mul_154" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_155" + op: "Mul" + input: "gradients/bert/encoder/layer_9/attention/self/value/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_154" + op: "Identity" + input: "clip_by_global_norm/mul_155" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_156" + op: "Mul" + input: "gradients/bert/encoder/layer_9/attention/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_155" + op: "Identity" + input: "clip_by_global_norm/mul_156" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_157" + op: "Mul" + input: "gradients/bert/encoder/layer_9/attention/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_156" + op: "Identity" + input: "clip_by_global_norm/mul_157" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_158" + op: "Mul" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_157" + op: "Identity" + input: "clip_by_global_norm/mul_158" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_159" + op: "Mul" + input: "gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_158" + op: "Identity" + input: "clip_by_global_norm/mul_159" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_160" + op: "Mul" + input: "gradients/bert/encoder/layer_9/intermediate/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_159" + op: "Identity" + input: "clip_by_global_norm/mul_160" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_161" + op: "Mul" + input: "gradients/bert/encoder/layer_9/intermediate/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_160" + op: "Identity" + input: "clip_by_global_norm/mul_161" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_162" + op: "Mul" + input: "gradients/bert/encoder/layer_9/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_161" + op: "Identity" + input: "clip_by_global_norm/mul_162" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_163" + op: "Mul" + input: "gradients/bert/encoder/layer_9/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_162" + op: "Identity" + input: "clip_by_global_norm/mul_163" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_164" + op: "Mul" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_163" + op: "Identity" + input: "clip_by_global_norm/mul_164" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_165" + op: "Mul" + input: "gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_164" + op: "Identity" + input: "clip_by_global_norm/mul_165" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_9/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_166" + op: "Mul" + input: "gradients/bert/encoder/layer_10/attention/self/query/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_165" + op: "Identity" + input: "clip_by_global_norm/mul_166" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_167" + op: "Mul" + input: "gradients/bert/encoder/layer_10/attention/self/query/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_166" + op: "Identity" + input: "clip_by_global_norm/mul_167" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_168" + op: "Mul" + input: "gradients/bert/encoder/layer_10/attention/self/key/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_167" + op: "Identity" + input: "clip_by_global_norm/mul_168" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_169" + op: "Mul" + input: "gradients/bert/encoder/layer_10/attention/self/key/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_168" + op: "Identity" + input: "clip_by_global_norm/mul_169" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_170" + op: "Mul" + input: "gradients/bert/encoder/layer_10/attention/self/value/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_169" + op: "Identity" + input: "clip_by_global_norm/mul_170" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_171" + op: "Mul" + input: "gradients/bert/encoder/layer_10/attention/self/value/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_170" + op: "Identity" + input: "clip_by_global_norm/mul_171" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_172" + op: "Mul" + input: "gradients/bert/encoder/layer_10/attention/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_171" + op: "Identity" + input: "clip_by_global_norm/mul_172" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_173" + op: "Mul" + input: "gradients/bert/encoder/layer_10/attention/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_172" + op: "Identity" + input: "clip_by_global_norm/mul_173" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_174" + op: "Mul" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_173" + op: "Identity" + input: "clip_by_global_norm/mul_174" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_175" + op: "Mul" + input: "gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_174" + op: "Identity" + input: "clip_by_global_norm/mul_175" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_176" + op: "Mul" + input: "gradients/bert/encoder/layer_10/intermediate/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_175" + op: "Identity" + input: "clip_by_global_norm/mul_176" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_177" + op: "Mul" + input: "gradients/bert/encoder/layer_10/intermediate/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_176" + op: "Identity" + input: "clip_by_global_norm/mul_177" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_178" + op: "Mul" + input: "gradients/bert/encoder/layer_10/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_177" + op: "Identity" + input: "clip_by_global_norm/mul_178" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_179" + op: "Mul" + input: "gradients/bert/encoder/layer_10/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_178" + op: "Identity" + input: "clip_by_global_norm/mul_179" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_180" + op: "Mul" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_179" + op: "Identity" + input: "clip_by_global_norm/mul_180" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_181" + op: "Mul" + input: "gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_180" + op: "Identity" + input: "clip_by_global_norm/mul_181" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_10/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_182" + op: "Mul" + input: "gradients/bert/encoder/layer_11/attention/self/query/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_181" + op: "Identity" + input: "clip_by_global_norm/mul_182" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/self/query/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_183" + op: "Mul" + input: "gradients/bert/encoder/layer_11/attention/self/query/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_182" + op: "Identity" + input: "clip_by_global_norm/mul_183" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/self/query/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_184" + op: "Mul" + input: "gradients/bert/encoder/layer_11/attention/self/key/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_183" + op: "Identity" + input: "clip_by_global_norm/mul_184" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/self/key/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_185" + op: "Mul" + input: "gradients/bert/encoder/layer_11/attention/self/key/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_184" + op: "Identity" + input: "clip_by_global_norm/mul_185" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/self/key/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_186" + op: "Mul" + input: "gradients/bert/encoder/layer_11/attention/self/value/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_185" + op: "Identity" + input: "clip_by_global_norm/mul_186" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/self/value/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_187" + op: "Mul" + input: "gradients/bert/encoder/layer_11/attention/self/value/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_186" + op: "Identity" + input: "clip_by_global_norm/mul_187" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/self/value/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_188" + op: "Mul" + input: "gradients/bert/encoder/layer_11/attention/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_187" + op: "Identity" + input: "clip_by_global_norm/mul_188" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_189" + op: "Mul" + input: "gradients/bert/encoder/layer_11/attention/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_188" + op: "Identity" + input: "clip_by_global_norm/mul_189" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_190" + op: "Mul" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_189" + op: "Identity" + input: "clip_by_global_norm/mul_190" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_191" + op: "Mul" + input: "gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_190" + op: "Identity" + input: "clip_by_global_norm/mul_191" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/attention/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_192" + op: "Mul" + input: "gradients/bert/encoder/layer_11/intermediate/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_191" + op: "Identity" + input: "clip_by_global_norm/mul_192" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/intermediate/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_193" + op: "Mul" + input: "gradients/bert/encoder/layer_11/intermediate/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_192" + op: "Identity" + input: "clip_by_global_norm/mul_193" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/intermediate/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_194" + op: "Mul" + input: "gradients/bert/encoder/layer_11/output/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_193" + op: "Identity" + input: "clip_by_global_norm/mul_194" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_195" + op: "Mul" + input: "gradients/bert/encoder/layer_11/output/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_194" + op: "Identity" + input: "clip_by_global_norm/mul_195" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_196" + op: "Mul" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_195" + op: "Identity" + input: "clip_by_global_norm/mul_196" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_197" + op: "Mul" + input: "gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_196" + op: "Identity" + input: "clip_by_global_norm/mul_197" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/layer_11/output/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_198" + op: "Mul" + input: "gradients/bert/pooler/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/pooler/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_197" + op: "Identity" + input: "clip_by_global_norm/mul_198" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/pooler/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_199" + op: "Mul" + input: "gradients/bert/pooler/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/pooler/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_198" + op: "Identity" + input: "clip_by_global_norm/mul_199" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/pooler/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_200" + op: "Mul" + input: "gradients/cls/predictions/transform/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_199" + op: "Identity" + input: "clip_by_global_norm/mul_200" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_201" + op: "Mul" + input: "gradients/cls/predictions/transform/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_200" + op: "Identity" + input: "clip_by_global_norm/mul_201" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_202" + op: "Mul" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_201" + op: "Identity" + input: "clip_by_global_norm/mul_202" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_203" + op: "Mul" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_202" + op: "Identity" + input: "clip_by_global_norm/mul_203" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_204" + op: "Mul" + input: "gradients/cls/predictions/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_203" + op: "Identity" + input: "clip_by_global_norm/mul_204" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_205" + op: "Mul" + input: "gradients/cls/seq_relationship/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/seq_relationship/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_204" + op: "Identity" + input: "clip_by_global_norm/mul_205" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/seq_relationship/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_206" + op: "Mul" + input: "gradients/cls/seq_relationship/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/seq_relationship/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_205" + op: "Identity" + input: "clip_by_global_norm/mul_206" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/seq_relationship/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "bert/embeddings/word_embeddings/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "Dq\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/embeddings/word_embeddings/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/word_embeddings/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/embeddings/word_embeddings/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/embeddings/word_embeddings/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/embeddings/word_embeddings/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 28996 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/word_embeddings/adam_m/Assign" + op: "Assign" + input: "bert/embeddings/word_embeddings/adam_m" + input: "bert/embeddings/word_embeddings/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/word_embeddings/adam_m/read" + op: "Identity" + input: "bert/embeddings/word_embeddings/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/word_embeddings/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "Dq\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/embeddings/word_embeddings/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/word_embeddings/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/embeddings/word_embeddings/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/embeddings/word_embeddings/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/embeddings/word_embeddings/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 28996 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/word_embeddings/adam_v/Assign" + op: "Assign" + input: "bert/embeddings/word_embeddings/adam_v" + input: "bert/embeddings/word_embeddings/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/word_embeddings/adam_v/read" + op: "Identity" + input: "bert/embeddings/word_embeddings/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_4/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_4" + op: "Mul" + input: "Mul_4/x" + input: "bert/embeddings/word_embeddings/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_5/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_5/strided_slice/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "Mul_5/strided_slice/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "Mul_5/strided_slice/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "Mul_5/strided_slice" + op: "StridedSlice" + input: "gradients/Shape_1" + input: "Mul_5/strided_slice/stack" + input: "Mul_5/strided_slice/stack_1" + input: "Mul_5/strided_slice/stack_2" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 1 + } + } +} +node { + name: "Mul_5/y" + op: "UnsortedSegmentSum" + input: "clip_by_global_norm/clip_by_global_norm/_0" + input: "gradients/concat_1" + input: "Mul_5/strided_slice" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tindices" + value { + type: DT_INT32 + } + } + attr { + key: "Tnumsegments" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_5" + op: "Mul" + input: "Mul_5/x" + input: "Mul_5/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_3" + op: "Add" + input: "Mul_4" + input: "Mul_5" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_6/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_6" + op: "Mul" + input: "Mul_6/x" + input: "bert/embeddings/word_embeddings/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square/strided_slice/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "Square/strided_slice/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "Square/strided_slice/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "Square/strided_slice" + op: "StridedSlice" + input: "gradients/Shape_1" + input: "Square/strided_slice/stack" + input: "Square/strided_slice/stack_1" + input: "Square/strided_slice/stack_2" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 1 + } + } +} +node { + name: "Square/x" + op: "UnsortedSegmentSum" + input: "clip_by_global_norm/clip_by_global_norm/_0" + input: "gradients/concat_1" + input: "Square/strided_slice" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tindices" + value { + type: DT_INT32 + } + } + attr { + key: "Tnumsegments" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square" + op: "Square" + input: "Square/x" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_7/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_7" + op: "Mul" + input: "Mul_7/x" + input: "Square" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_4" + op: "Add" + input: "Mul_6" + input: "Mul_7" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt" + op: "Sqrt" + input: "add_4" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_5/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_5" + op: "Add" + input: "Sqrt" + input: "add_5/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_1" + op: "RealDiv" + input: "add_3" + input: "add_5" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_8/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_8" + op: "Mul" + input: "mul_8/x" + input: "bert/embeddings/word_embeddings/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_6" + op: "Add" + input: "truediv_1" + input: "mul_8" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_9" + op: "Mul" + input: "add_2" + input: "add_6" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_1" + op: "Sub" + input: "bert/embeddings/word_embeddings/read" + input: "mul_9" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_206" + op: "Assign" + input: "bert/embeddings/word_embeddings" + input: "sub_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_207" + op: "Assign" + input: "bert/embeddings/word_embeddings/adam_m" + input: "add_3" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_208" + op: "Assign" + input: "bert/embeddings/word_embeddings/adam_v" + input: "add_4" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\002\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/embeddings/token_type_embeddings/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/embeddings/token_type_embeddings/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/adam_m/Assign" + op: "Assign" + input: "bert/embeddings/token_type_embeddings/adam_m" + input: "bert/embeddings/token_type_embeddings/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/adam_m/read" + op: "Identity" + input: "bert/embeddings/token_type_embeddings/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\002\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/embeddings/token_type_embeddings/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/embeddings/token_type_embeddings/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/adam_v/Assign" + op: "Assign" + input: "bert/embeddings/token_type_embeddings/adam_v" + input: "bert/embeddings/token_type_embeddings/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/adam_v/read" + op: "Identity" + input: "bert/embeddings/token_type_embeddings/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_10/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_10" + op: "Mul" + input: "Mul_10/x" + input: "bert/embeddings/token_type_embeddings/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_11/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_11" + op: "Mul" + input: "Mul_11/x" + input: "clip_by_global_norm/clip_by_global_norm/_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_7" + op: "Add" + input: "Mul_10" + input: "Mul_11" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_12/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_12" + op: "Mul" + input: "Mul_12/x" + input: "bert/embeddings/token_type_embeddings/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_1" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_13/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_13" + op: "Mul" + input: "Mul_13/x" + input: "Square_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_8" + op: "Add" + input: "Mul_12" + input: "Mul_13" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_1" + op: "Sqrt" + input: "add_8" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_9/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_9" + op: "Add" + input: "Sqrt_1" + input: "add_9/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_2" + op: "RealDiv" + input: "add_7" + input: "add_9" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_14/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_14" + op: "Mul" + input: "mul_14/x" + input: "bert/embeddings/token_type_embeddings/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_10" + op: "Add" + input: "truediv_2" + input: "mul_14" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_15" + op: "Mul" + input: "add_2" + input: "add_10" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_2" + op: "Sub" + input: "bert/embeddings/token_type_embeddings/read" + input: "mul_15" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_209" + op: "Assign" + input: "bert/embeddings/token_type_embeddings" + input: "sub_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_210" + op: "Assign" + input: "bert/embeddings/token_type_embeddings/adam_m" + input: "add_7" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_211" + op: "Assign" + input: "bert/embeddings/token_type_embeddings/adam_v" + input: "add_8" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/position_embeddings/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\002\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/embeddings/position_embeddings/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/position_embeddings/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/embeddings/position_embeddings/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/embeddings/position_embeddings/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/embeddings/position_embeddings/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/position_embeddings/adam_m/Assign" + op: "Assign" + input: "bert/embeddings/position_embeddings/adam_m" + input: "bert/embeddings/position_embeddings/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/position_embeddings/adam_m/read" + op: "Identity" + input: "bert/embeddings/position_embeddings/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/position_embeddings/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\002\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/embeddings/position_embeddings/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/position_embeddings/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/embeddings/position_embeddings/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/embeddings/position_embeddings/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/embeddings/position_embeddings/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/position_embeddings/adam_v/Assign" + op: "Assign" + input: "bert/embeddings/position_embeddings/adam_v" + input: "bert/embeddings/position_embeddings/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/position_embeddings/adam_v/read" + op: "Identity" + input: "bert/embeddings/position_embeddings/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_16/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_16" + op: "Mul" + input: "Mul_16/x" + input: "bert/embeddings/position_embeddings/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_17/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_17" + op: "Mul" + input: "Mul_17/x" + input: "clip_by_global_norm/clip_by_global_norm/_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_11" + op: "Add" + input: "Mul_16" + input: "Mul_17" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_18/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_18" + op: "Mul" + input: "Mul_18/x" + input: "bert/embeddings/position_embeddings/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_2" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_19/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_19" + op: "Mul" + input: "Mul_19/x" + input: "Square_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_12" + op: "Add" + input: "Mul_18" + input: "Mul_19" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_2" + op: "Sqrt" + input: "add_12" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_13/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_13" + op: "Add" + input: "Sqrt_2" + input: "add_13/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_3" + op: "RealDiv" + input: "add_11" + input: "add_13" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_20/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_20" + op: "Mul" + input: "mul_20/x" + input: "bert/embeddings/position_embeddings/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_14" + op: "Add" + input: "truediv_3" + input: "mul_20" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_21" + op: "Mul" + input: "add_2" + input: "add_14" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_3" + op: "Sub" + input: "bert/embeddings/position_embeddings/read" + input: "mul_21" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_212" + op: "Assign" + input: "bert/embeddings/position_embeddings" + input: "sub_3" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_213" + op: "Assign" + input: "bert/embeddings/position_embeddings/adam_m" + input: "add_11" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_214" + op: "Assign" + input: "bert/embeddings/position_embeddings/adam_v" + input: "add_12" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/embeddings/LayerNorm/beta/adam_m" + input: "bert/embeddings/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/embeddings/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/embeddings/LayerNorm/beta/adam_v" + input: "bert/embeddings/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/embeddings/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_22/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_22" + op: "Mul" + input: "Mul_22/x" + input: "bert/embeddings/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_23/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_23" + op: "Mul" + input: "Mul_23/x" + input: "clip_by_global_norm/clip_by_global_norm/_3" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_15" + op: "Add" + input: "Mul_22" + input: "Mul_23" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_24/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_24" + op: "Mul" + input: "Mul_24/x" + input: "bert/embeddings/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_3" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_3" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_25/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_25" + op: "Mul" + input: "Mul_25/x" + input: "Square_3" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_16" + op: "Add" + input: "Mul_24" + input: "Mul_25" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_3" + op: "Sqrt" + input: "add_16" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_17/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_17" + op: "Add" + input: "Sqrt_3" + input: "add_17/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_4" + op: "RealDiv" + input: "add_15" + input: "add_17" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_26" + op: "Mul" + input: "add_2" + input: "truediv_4" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_4" + op: "Sub" + input: "bert/embeddings/LayerNorm/beta/read" + input: "mul_26" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_215" + op: "Assign" + input: "bert/embeddings/LayerNorm/beta" + input: "sub_4" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_216" + op: "Assign" + input: "bert/embeddings/LayerNorm/beta/adam_m" + input: "add_15" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_217" + op: "Assign" + input: "bert/embeddings/LayerNorm/beta/adam_v" + input: "add_16" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/embeddings/LayerNorm/gamma/adam_m" + input: "bert/embeddings/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/embeddings/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/embeddings/LayerNorm/gamma/adam_v" + input: "bert/embeddings/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/embeddings/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_27/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_27" + op: "Mul" + input: "Mul_27/x" + input: "bert/embeddings/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_28/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_28" + op: "Mul" + input: "Mul_28/x" + input: "clip_by_global_norm/clip_by_global_norm/_4" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_18" + op: "Add" + input: "Mul_27" + input: "Mul_28" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_29/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_29" + op: "Mul" + input: "Mul_29/x" + input: "bert/embeddings/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_4" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_4" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_30/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_30" + op: "Mul" + input: "Mul_30/x" + input: "Square_4" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_19" + op: "Add" + input: "Mul_29" + input: "Mul_30" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_4" + op: "Sqrt" + input: "add_19" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_20/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_20" + op: "Add" + input: "Sqrt_4" + input: "add_20/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_5" + op: "RealDiv" + input: "add_18" + input: "add_20" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_31" + op: "Mul" + input: "add_2" + input: "truediv_5" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_5" + op: "Sub" + input: "bert/embeddings/LayerNorm/gamma/read" + input: "mul_31" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_218" + op: "Assign" + input: "bert/embeddings/LayerNorm/gamma" + input: "sub_5" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_219" + op: "Assign" + input: "bert/embeddings/LayerNorm/gamma/adam_m" + input: "add_18" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_220" + op: "Assign" + input: "bert/embeddings/LayerNorm/gamma/adam_v" + input: "add_19" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_0/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_0/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_0/attention/self/query/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/self/query/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_0/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_0/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_0/attention/self/query/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/self/query/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_32/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_32" + op: "Mul" + input: "Mul_32/x" + input: "bert/encoder/layer_0/attention/self/query/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_33/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_33" + op: "Mul" + input: "Mul_33/x" + input: "clip_by_global_norm/clip_by_global_norm/_5" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_21" + op: "Add" + input: "Mul_32" + input: "Mul_33" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_34/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_34" + op: "Mul" + input: "Mul_34/x" + input: "bert/encoder/layer_0/attention/self/query/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_5" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_5" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_35/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_35" + op: "Mul" + input: "Mul_35/x" + input: "Square_5" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_22" + op: "Add" + input: "Mul_34" + input: "Mul_35" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_5" + op: "Sqrt" + input: "add_22" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_23/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_23" + op: "Add" + input: "Sqrt_5" + input: "add_23/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_6" + op: "RealDiv" + input: "add_21" + input: "add_23" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_36/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_36" + op: "Mul" + input: "mul_36/x" + input: "bert/encoder/layer_0/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_24" + op: "Add" + input: "truediv_6" + input: "mul_36" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_37" + op: "Mul" + input: "add_2" + input: "add_24" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_6" + op: "Sub" + input: "bert/encoder/layer_0/attention/self/query/kernel/read" + input: "mul_37" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_221" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/query/kernel" + input: "sub_6" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_222" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/query/kernel/adam_m" + input: "add_21" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_223" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/query/kernel/adam_v" + input: "add_22" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_0/attention/self/query/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/self/query/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_0/attention/self/query/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/query/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/self/query/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_38/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_38" + op: "Mul" + input: "Mul_38/x" + input: "bert/encoder/layer_0/attention/self/query/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_39/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_39" + op: "Mul" + input: "Mul_39/x" + input: "clip_by_global_norm/clip_by_global_norm/_6" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_25" + op: "Add" + input: "Mul_38" + input: "Mul_39" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_40/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_40" + op: "Mul" + input: "Mul_40/x" + input: "bert/encoder/layer_0/attention/self/query/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_6" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_6" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_41/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_41" + op: "Mul" + input: "Mul_41/x" + input: "Square_6" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_26" + op: "Add" + input: "Mul_40" + input: "Mul_41" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_6" + op: "Sqrt" + input: "add_26" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_27/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_27" + op: "Add" + input: "Sqrt_6" + input: "add_27/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_7" + op: "RealDiv" + input: "add_25" + input: "add_27" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_42" + op: "Mul" + input: "add_2" + input: "truediv_7" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_7" + op: "Sub" + input: "bert/encoder/layer_0/attention/self/query/bias/read" + input: "mul_42" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_224" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/query/bias" + input: "sub_7" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_225" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/query/bias/adam_m" + input: "add_25" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_226" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/query/bias/adam_v" + input: "add_26" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_0/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_0/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_0/attention/self/key/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/self/key/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_0/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_0/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_0/attention/self/key/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/self/key/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_43/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_43" + op: "Mul" + input: "Mul_43/x" + input: "bert/encoder/layer_0/attention/self/key/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_44/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_44" + op: "Mul" + input: "Mul_44/x" + input: "clip_by_global_norm/clip_by_global_norm/_7" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_28" + op: "Add" + input: "Mul_43" + input: "Mul_44" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_45/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_45" + op: "Mul" + input: "Mul_45/x" + input: "bert/encoder/layer_0/attention/self/key/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_7" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_7" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_46/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_46" + op: "Mul" + input: "Mul_46/x" + input: "Square_7" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_29" + op: "Add" + input: "Mul_45" + input: "Mul_46" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_7" + op: "Sqrt" + input: "add_29" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_30/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_30" + op: "Add" + input: "Sqrt_7" + input: "add_30/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_8" + op: "RealDiv" + input: "add_28" + input: "add_30" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_47/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_47" + op: "Mul" + input: "mul_47/x" + input: "bert/encoder/layer_0/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_31" + op: "Add" + input: "truediv_8" + input: "mul_47" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_48" + op: "Mul" + input: "add_2" + input: "add_31" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_8" + op: "Sub" + input: "bert/encoder/layer_0/attention/self/key/kernel/read" + input: "mul_48" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_227" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/key/kernel" + input: "sub_8" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_228" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/key/kernel/adam_m" + input: "add_28" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_229" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/key/kernel/adam_v" + input: "add_29" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_0/attention/self/key/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/self/key/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_0/attention/self/key/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/key/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/self/key/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_49/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_49" + op: "Mul" + input: "Mul_49/x" + input: "bert/encoder/layer_0/attention/self/key/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_50/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_50" + op: "Mul" + input: "Mul_50/x" + input: "clip_by_global_norm/clip_by_global_norm/_8" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_32" + op: "Add" + input: "Mul_49" + input: "Mul_50" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_51/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_51" + op: "Mul" + input: "Mul_51/x" + input: "bert/encoder/layer_0/attention/self/key/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_8" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_8" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_52/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_52" + op: "Mul" + input: "Mul_52/x" + input: "Square_8" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_33" + op: "Add" + input: "Mul_51" + input: "Mul_52" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_8" + op: "Sqrt" + input: "add_33" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_34/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_34" + op: "Add" + input: "Sqrt_8" + input: "add_34/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_9" + op: "RealDiv" + input: "add_32" + input: "add_34" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_53" + op: "Mul" + input: "add_2" + input: "truediv_9" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_9" + op: "Sub" + input: "bert/encoder/layer_0/attention/self/key/bias/read" + input: "mul_53" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_230" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/key/bias" + input: "sub_9" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_231" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/key/bias/adam_m" + input: "add_32" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_232" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/key/bias/adam_v" + input: "add_33" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_0/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_0/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_0/attention/self/value/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/self/value/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_0/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_0/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_0/attention/self/value/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/self/value/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_54/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_54" + op: "Mul" + input: "Mul_54/x" + input: "bert/encoder/layer_0/attention/self/value/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_55/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_55" + op: "Mul" + input: "Mul_55/x" + input: "clip_by_global_norm/clip_by_global_norm/_9" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_35" + op: "Add" + input: "Mul_54" + input: "Mul_55" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_56/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_56" + op: "Mul" + input: "Mul_56/x" + input: "bert/encoder/layer_0/attention/self/value/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_9" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_9" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_57/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_57" + op: "Mul" + input: "Mul_57/x" + input: "Square_9" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_36" + op: "Add" + input: "Mul_56" + input: "Mul_57" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_9" + op: "Sqrt" + input: "add_36" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_37/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_37" + op: "Add" + input: "Sqrt_9" + input: "add_37/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_10" + op: "RealDiv" + input: "add_35" + input: "add_37" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_58/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_58" + op: "Mul" + input: "mul_58/x" + input: "bert/encoder/layer_0/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_38" + op: "Add" + input: "truediv_10" + input: "mul_58" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_59" + op: "Mul" + input: "add_2" + input: "add_38" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_10" + op: "Sub" + input: "bert/encoder/layer_0/attention/self/value/kernel/read" + input: "mul_59" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_233" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/value/kernel" + input: "sub_10" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_234" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/value/kernel/adam_m" + input: "add_35" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_235" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/value/kernel/adam_v" + input: "add_36" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_0/attention/self/value/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/self/value/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_0/attention/self/value/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/self/value/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/self/value/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_60/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_60" + op: "Mul" + input: "Mul_60/x" + input: "bert/encoder/layer_0/attention/self/value/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_61/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_61" + op: "Mul" + input: "Mul_61/x" + input: "clip_by_global_norm/clip_by_global_norm/_10" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_39" + op: "Add" + input: "Mul_60" + input: "Mul_61" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_62/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_62" + op: "Mul" + input: "Mul_62/x" + input: "bert/encoder/layer_0/attention/self/value/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_10" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_10" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_63/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_63" + op: "Mul" + input: "Mul_63/x" + input: "Square_10" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_40" + op: "Add" + input: "Mul_62" + input: "Mul_63" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_10" + op: "Sqrt" + input: "add_40" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_41/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_41" + op: "Add" + input: "Sqrt_10" + input: "add_41/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_11" + op: "RealDiv" + input: "add_39" + input: "add_41" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_64" + op: "Mul" + input: "add_2" + input: "truediv_11" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_11" + op: "Sub" + input: "bert/encoder/layer_0/attention/self/value/bias/read" + input: "mul_64" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_236" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/value/bias" + input: "sub_11" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_237" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/value/bias/adam_m" + input: "add_39" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_238" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/value/bias/adam_v" + input: "add_40" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_65/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_65" + op: "Mul" + input: "Mul_65/x" + input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_66/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_66" + op: "Mul" + input: "Mul_66/x" + input: "clip_by_global_norm/clip_by_global_norm/_11" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_42" + op: "Add" + input: "Mul_65" + input: "Mul_66" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_67/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_67" + op: "Mul" + input: "Mul_67/x" + input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_11" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_11" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_68/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_68" + op: "Mul" + input: "Mul_68/x" + input: "Square_11" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_43" + op: "Add" + input: "Mul_67" + input: "Mul_68" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_11" + op: "Sqrt" + input: "add_43" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_44/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_44" + op: "Add" + input: "Sqrt_11" + input: "add_44/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_12" + op: "RealDiv" + input: "add_42" + input: "add_44" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_69/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_69" + op: "Mul" + input: "mul_69/x" + input: "bert/encoder/layer_0/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_45" + op: "Add" + input: "truediv_12" + input: "mul_69" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_70" + op: "Mul" + input: "add_2" + input: "add_45" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_12" + op: "Sub" + input: "bert/encoder/layer_0/attention/output/dense/kernel/read" + input: "mul_70" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_239" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/dense/kernel" + input: "sub_12" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_240" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m" + input: "add_42" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_241" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v" + input: "add_43" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_0/attention/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_0/attention/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_71/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_71" + op: "Mul" + input: "Mul_71/x" + input: "bert/encoder/layer_0/attention/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_72/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_72" + op: "Mul" + input: "Mul_72/x" + input: "clip_by_global_norm/clip_by_global_norm/_12" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_46" + op: "Add" + input: "Mul_71" + input: "Mul_72" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_73/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_73" + op: "Mul" + input: "Mul_73/x" + input: "bert/encoder/layer_0/attention/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_12" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_12" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_74/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_74" + op: "Mul" + input: "Mul_74/x" + input: "Square_12" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_47" + op: "Add" + input: "Mul_73" + input: "Mul_74" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_12" + op: "Sqrt" + input: "add_47" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_48/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_48" + op: "Add" + input: "Sqrt_12" + input: "add_48/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_13" + op: "RealDiv" + input: "add_46" + input: "add_48" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_75" + op: "Mul" + input: "add_2" + input: "truediv_13" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_13" + op: "Sub" + input: "bert/encoder/layer_0/attention/output/dense/bias/read" + input: "mul_75" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_242" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/dense/bias" + input: "sub_13" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_243" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/dense/bias/adam_m" + input: "add_46" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_244" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/dense/bias/adam_v" + input: "add_47" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_76/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_76" + op: "Mul" + input: "Mul_76/x" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_77/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_77" + op: "Mul" + input: "Mul_77/x" + input: "clip_by_global_norm/clip_by_global_norm/_13" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_49" + op: "Add" + input: "Mul_76" + input: "Mul_77" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_78/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_78" + op: "Mul" + input: "Mul_78/x" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_13" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_13" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_79/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_79" + op: "Mul" + input: "Mul_79/x" + input: "Square_13" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_50" + op: "Add" + input: "Mul_78" + input: "Mul_79" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_13" + op: "Sqrt" + input: "add_50" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_51/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_51" + op: "Add" + input: "Sqrt_13" + input: "add_51/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_14" + op: "RealDiv" + input: "add_49" + input: "add_51" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_80" + op: "Mul" + input: "add_2" + input: "truediv_14" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_14" + op: "Sub" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/read" + input: "mul_80" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_245" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta" + input: "sub_14" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_246" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m" + input: "add_49" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_247" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v" + input: "add_50" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_81/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_81" + op: "Mul" + input: "Mul_81/x" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_82/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_82" + op: "Mul" + input: "Mul_82/x" + input: "clip_by_global_norm/clip_by_global_norm/_14" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_52" + op: "Add" + input: "Mul_81" + input: "Mul_82" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_83/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_83" + op: "Mul" + input: "Mul_83/x" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_14" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_14" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_84/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_84" + op: "Mul" + input: "Mul_84/x" + input: "Square_14" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_53" + op: "Add" + input: "Mul_83" + input: "Mul_84" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_14" + op: "Sqrt" + input: "add_53" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_54/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_54" + op: "Add" + input: "Sqrt_14" + input: "add_54/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_15" + op: "RealDiv" + input: "add_52" + input: "add_54" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_85" + op: "Mul" + input: "add_2" + input: "truediv_15" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_15" + op: "Sub" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/read" + input: "mul_85" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_248" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma" + input: "sub_15" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_249" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m" + input: "add_52" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_250" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v" + input: "add_53" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_86/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_86" + op: "Mul" + input: "Mul_86/x" + input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_87/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_87" + op: "Mul" + input: "Mul_87/x" + input: "clip_by_global_norm/clip_by_global_norm/_15" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_55" + op: "Add" + input: "Mul_86" + input: "Mul_87" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_88/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_88" + op: "Mul" + input: "Mul_88/x" + input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_15" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_15" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_89/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_89" + op: "Mul" + input: "Mul_89/x" + input: "Square_15" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_56" + op: "Add" + input: "Mul_88" + input: "Mul_89" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_15" + op: "Sqrt" + input: "add_56" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_57/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_57" + op: "Add" + input: "Sqrt_15" + input: "add_57/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_16" + op: "RealDiv" + input: "add_55" + input: "add_57" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_90/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_90" + op: "Mul" + input: "mul_90/x" + input: "bert/encoder/layer_0/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_58" + op: "Add" + input: "truediv_16" + input: "mul_90" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_91" + op: "Mul" + input: "add_2" + input: "add_58" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_16" + op: "Sub" + input: "bert/encoder/layer_0/intermediate/dense/kernel/read" + input: "mul_91" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_251" + op: "Assign" + input: "bert/encoder/layer_0/intermediate/dense/kernel" + input: "sub_16" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_252" + op: "Assign" + input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m" + input: "add_55" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_253" + op: "Assign" + input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v" + input: "add_56" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/bias/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_0/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_0/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_0/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_0/intermediate/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_0/intermediate/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/bias/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_0/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_0/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_0/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_0/intermediate/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/intermediate/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_0/intermediate/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_92/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_92" + op: "Mul" + input: "Mul_92/x" + input: "bert/encoder/layer_0/intermediate/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_93/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_93" + op: "Mul" + input: "Mul_93/x" + input: "clip_by_global_norm/clip_by_global_norm/_16" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_59" + op: "Add" + input: "Mul_92" + input: "Mul_93" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_94/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_94" + op: "Mul" + input: "Mul_94/x" + input: "bert/encoder/layer_0/intermediate/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_16" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_16" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_95/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_95" + op: "Mul" + input: "Mul_95/x" + input: "Square_16" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_60" + op: "Add" + input: "Mul_94" + input: "Mul_95" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_16" + op: "Sqrt" + input: "add_60" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_61/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_61" + op: "Add" + input: "Sqrt_16" + input: "add_61/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_17" + op: "RealDiv" + input: "add_59" + input: "add_61" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_96" + op: "Mul" + input: "add_2" + input: "truediv_17" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_17" + op: "Sub" + input: "bert/encoder/layer_0/intermediate/dense/bias/read" + input: "mul_96" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_254" + op: "Assign" + input: "bert/encoder/layer_0/intermediate/dense/bias" + input: "sub_17" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_255" + op: "Assign" + input: "bert/encoder/layer_0/intermediate/dense/bias/adam_m" + input: "add_59" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_256" + op: "Assign" + input: "bert/encoder/layer_0/intermediate/dense/bias/adam_v" + input: "add_60" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_0/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_0/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_0/output/dense/kernel/adam_m" + input: "bert/encoder/layer_0/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_0/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_0/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_0/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_0/output/dense/kernel/adam_v" + input: "bert/encoder/layer_0/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_0/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_97/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_97" + op: "Mul" + input: "Mul_97/x" + input: "bert/encoder/layer_0/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_98/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_98" + op: "Mul" + input: "Mul_98/x" + input: "clip_by_global_norm/clip_by_global_norm/_17" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_62" + op: "Add" + input: "Mul_97" + input: "Mul_98" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_99/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_99" + op: "Mul" + input: "Mul_99/x" + input: "bert/encoder/layer_0/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_17" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_17" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_100/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_100" + op: "Mul" + input: "Mul_100/x" + input: "Square_17" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_63" + op: "Add" + input: "Mul_99" + input: "Mul_100" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_17" + op: "Sqrt" + input: "add_63" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_64/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_64" + op: "Add" + input: "Sqrt_17" + input: "add_64/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_18" + op: "RealDiv" + input: "add_62" + input: "add_64" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_101/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_101" + op: "Mul" + input: "mul_101/x" + input: "bert/encoder/layer_0/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_65" + op: "Add" + input: "truediv_18" + input: "mul_101" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_102" + op: "Mul" + input: "add_2" + input: "add_65" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_18" + op: "Sub" + input: "bert/encoder/layer_0/output/dense/kernel/read" + input: "mul_102" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_257" + op: "Assign" + input: "bert/encoder/layer_0/output/dense/kernel" + input: "sub_18" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_258" + op: "Assign" + input: "bert/encoder/layer_0/output/dense/kernel/adam_m" + input: "add_62" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_259" + op: "Assign" + input: "bert/encoder/layer_0/output/dense/kernel/adam_v" + input: "add_63" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_0/output/dense/bias/adam_m" + input: "bert/encoder/layer_0/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_0/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_0/output/dense/bias/adam_v" + input: "bert/encoder/layer_0/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_0/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_103/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_103" + op: "Mul" + input: "Mul_103/x" + input: "bert/encoder/layer_0/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_104/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_104" + op: "Mul" + input: "Mul_104/x" + input: "clip_by_global_norm/clip_by_global_norm/_18" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_66" + op: "Add" + input: "Mul_103" + input: "Mul_104" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_105/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_105" + op: "Mul" + input: "Mul_105/x" + input: "bert/encoder/layer_0/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_18" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_18" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_106/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_106" + op: "Mul" + input: "Mul_106/x" + input: "Square_18" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_67" + op: "Add" + input: "Mul_105" + input: "Mul_106" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_18" + op: "Sqrt" + input: "add_67" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_68/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_68" + op: "Add" + input: "Sqrt_18" + input: "add_68/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_19" + op: "RealDiv" + input: "add_66" + input: "add_68" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_107" + op: "Mul" + input: "add_2" + input: "truediv_19" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_19" + op: "Sub" + input: "bert/encoder/layer_0/output/dense/bias/read" + input: "mul_107" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_260" + op: "Assign" + input: "bert/encoder/layer_0/output/dense/bias" + input: "sub_19" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_261" + op: "Assign" + input: "bert/encoder/layer_0/output/dense/bias/adam_m" + input: "add_66" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_262" + op: "Assign" + input: "bert/encoder/layer_0/output/dense/bias/adam_v" + input: "add_67" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_108/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_108" + op: "Mul" + input: "Mul_108/x" + input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_109/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_109" + op: "Mul" + input: "Mul_109/x" + input: "clip_by_global_norm/clip_by_global_norm/_19" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_69" + op: "Add" + input: "Mul_108" + input: "Mul_109" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_110/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_110" + op: "Mul" + input: "Mul_110/x" + input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_19" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_19" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_111/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_111" + op: "Mul" + input: "Mul_111/x" + input: "Square_19" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_70" + op: "Add" + input: "Mul_110" + input: "Mul_111" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_19" + op: "Sqrt" + input: "add_70" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_71/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_71" + op: "Add" + input: "Sqrt_19" + input: "add_71/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_20" + op: "RealDiv" + input: "add_69" + input: "add_71" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_112" + op: "Mul" + input: "add_2" + input: "truediv_20" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_20" + op: "Sub" + input: "bert/encoder/layer_0/output/LayerNorm/beta/read" + input: "mul_112" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_263" + op: "Assign" + input: "bert/encoder/layer_0/output/LayerNorm/beta" + input: "sub_20" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_264" + op: "Assign" + input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m" + input: "add_69" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_265" + op: "Assign" + input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v" + input: "add_70" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_113/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_113" + op: "Mul" + input: "Mul_113/x" + input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_114/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_114" + op: "Mul" + input: "Mul_114/x" + input: "clip_by_global_norm/clip_by_global_norm/_20" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_72" + op: "Add" + input: "Mul_113" + input: "Mul_114" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_115/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_115" + op: "Mul" + input: "Mul_115/x" + input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_20" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_20" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_116/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_116" + op: "Mul" + input: "Mul_116/x" + input: "Square_20" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_73" + op: "Add" + input: "Mul_115" + input: "Mul_116" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_20" + op: "Sqrt" + input: "add_73" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_74/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_74" + op: "Add" + input: "Sqrt_20" + input: "add_74/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_21" + op: "RealDiv" + input: "add_72" + input: "add_74" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_117" + op: "Mul" + input: "add_2" + input: "truediv_21" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_21" + op: "Sub" + input: "bert/encoder/layer_0/output/LayerNorm/gamma/read" + input: "mul_117" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_266" + op: "Assign" + input: "bert/encoder/layer_0/output/LayerNorm/gamma" + input: "sub_21" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_267" + op: "Assign" + input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m" + input: "add_72" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_268" + op: "Assign" + input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v" + input: "add_73" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_1/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_1/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_1/attention/self/query/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/self/query/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_1/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_1/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_1/attention/self/query/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/self/query/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_118/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_118" + op: "Mul" + input: "Mul_118/x" + input: "bert/encoder/layer_1/attention/self/query/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_119/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_119" + op: "Mul" + input: "Mul_119/x" + input: "clip_by_global_norm/clip_by_global_norm/_21" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_75" + op: "Add" + input: "Mul_118" + input: "Mul_119" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_120/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_120" + op: "Mul" + input: "Mul_120/x" + input: "bert/encoder/layer_1/attention/self/query/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_21" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_21" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_121/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_121" + op: "Mul" + input: "Mul_121/x" + input: "Square_21" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_76" + op: "Add" + input: "Mul_120" + input: "Mul_121" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_21" + op: "Sqrt" + input: "add_76" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_77/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_77" + op: "Add" + input: "Sqrt_21" + input: "add_77/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_22" + op: "RealDiv" + input: "add_75" + input: "add_77" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_122/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_122" + op: "Mul" + input: "mul_122/x" + input: "bert/encoder/layer_1/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_78" + op: "Add" + input: "truediv_22" + input: "mul_122" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_123" + op: "Mul" + input: "add_2" + input: "add_78" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_22" + op: "Sub" + input: "bert/encoder/layer_1/attention/self/query/kernel/read" + input: "mul_123" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_269" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/query/kernel" + input: "sub_22" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_270" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/query/kernel/adam_m" + input: "add_75" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_271" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/query/kernel/adam_v" + input: "add_76" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_1/attention/self/query/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/self/query/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_1/attention/self/query/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/query/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/self/query/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_124/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_124" + op: "Mul" + input: "Mul_124/x" + input: "bert/encoder/layer_1/attention/self/query/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_125/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_125" + op: "Mul" + input: "Mul_125/x" + input: "clip_by_global_norm/clip_by_global_norm/_22" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_79" + op: "Add" + input: "Mul_124" + input: "Mul_125" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_126/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_126" + op: "Mul" + input: "Mul_126/x" + input: "bert/encoder/layer_1/attention/self/query/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_22" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_22" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_127/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_127" + op: "Mul" + input: "Mul_127/x" + input: "Square_22" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_80" + op: "Add" + input: "Mul_126" + input: "Mul_127" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_22" + op: "Sqrt" + input: "add_80" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_81/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_81" + op: "Add" + input: "Sqrt_22" + input: "add_81/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_23" + op: "RealDiv" + input: "add_79" + input: "add_81" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_128" + op: "Mul" + input: "add_2" + input: "truediv_23" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_23" + op: "Sub" + input: "bert/encoder/layer_1/attention/self/query/bias/read" + input: "mul_128" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_272" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/query/bias" + input: "sub_23" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_273" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/query/bias/adam_m" + input: "add_79" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_274" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/query/bias/adam_v" + input: "add_80" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_1/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_1/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_1/attention/self/key/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/self/key/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_1/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_1/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_1/attention/self/key/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/self/key/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_129/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_129" + op: "Mul" + input: "Mul_129/x" + input: "bert/encoder/layer_1/attention/self/key/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_130/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_130" + op: "Mul" + input: "Mul_130/x" + input: "clip_by_global_norm/clip_by_global_norm/_23" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_82" + op: "Add" + input: "Mul_129" + input: "Mul_130" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_131/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_131" + op: "Mul" + input: "Mul_131/x" + input: "bert/encoder/layer_1/attention/self/key/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_23" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_23" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_132/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_132" + op: "Mul" + input: "Mul_132/x" + input: "Square_23" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_83" + op: "Add" + input: "Mul_131" + input: "Mul_132" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_23" + op: "Sqrt" + input: "add_83" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_84/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_84" + op: "Add" + input: "Sqrt_23" + input: "add_84/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_24" + op: "RealDiv" + input: "add_82" + input: "add_84" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_133/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_133" + op: "Mul" + input: "mul_133/x" + input: "bert/encoder/layer_1/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_85" + op: "Add" + input: "truediv_24" + input: "mul_133" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_134" + op: "Mul" + input: "add_2" + input: "add_85" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_24" + op: "Sub" + input: "bert/encoder/layer_1/attention/self/key/kernel/read" + input: "mul_134" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_275" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/key/kernel" + input: "sub_24" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_276" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/key/kernel/adam_m" + input: "add_82" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_277" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/key/kernel/adam_v" + input: "add_83" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_1/attention/self/key/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/self/key/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_1/attention/self/key/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/key/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/self/key/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_135/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_135" + op: "Mul" + input: "Mul_135/x" + input: "bert/encoder/layer_1/attention/self/key/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_136/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_136" + op: "Mul" + input: "Mul_136/x" + input: "clip_by_global_norm/clip_by_global_norm/_24" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_86" + op: "Add" + input: "Mul_135" + input: "Mul_136" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_137/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_137" + op: "Mul" + input: "Mul_137/x" + input: "bert/encoder/layer_1/attention/self/key/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_24" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_24" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_138/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_138" + op: "Mul" + input: "Mul_138/x" + input: "Square_24" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_87" + op: "Add" + input: "Mul_137" + input: "Mul_138" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_24" + op: "Sqrt" + input: "add_87" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_88/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_88" + op: "Add" + input: "Sqrt_24" + input: "add_88/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_25" + op: "RealDiv" + input: "add_86" + input: "add_88" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_139" + op: "Mul" + input: "add_2" + input: "truediv_25" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_25" + op: "Sub" + input: "bert/encoder/layer_1/attention/self/key/bias/read" + input: "mul_139" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_278" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/key/bias" + input: "sub_25" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_279" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/key/bias/adam_m" + input: "add_86" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_280" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/key/bias/adam_v" + input: "add_87" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_1/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_1/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_1/attention/self/value/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/self/value/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_1/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_1/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_1/attention/self/value/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/self/value/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_140/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_140" + op: "Mul" + input: "Mul_140/x" + input: "bert/encoder/layer_1/attention/self/value/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_141/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_141" + op: "Mul" + input: "Mul_141/x" + input: "clip_by_global_norm/clip_by_global_norm/_25" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_89" + op: "Add" + input: "Mul_140" + input: "Mul_141" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_142/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_142" + op: "Mul" + input: "Mul_142/x" + input: "bert/encoder/layer_1/attention/self/value/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_25" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_25" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_143/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_143" + op: "Mul" + input: "Mul_143/x" + input: "Square_25" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_90" + op: "Add" + input: "Mul_142" + input: "Mul_143" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_25" + op: "Sqrt" + input: "add_90" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_91/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_91" + op: "Add" + input: "Sqrt_25" + input: "add_91/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_26" + op: "RealDiv" + input: "add_89" + input: "add_91" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_144/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_144" + op: "Mul" + input: "mul_144/x" + input: "bert/encoder/layer_1/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_92" + op: "Add" + input: "truediv_26" + input: "mul_144" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_145" + op: "Mul" + input: "add_2" + input: "add_92" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_26" + op: "Sub" + input: "bert/encoder/layer_1/attention/self/value/kernel/read" + input: "mul_145" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_281" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/value/kernel" + input: "sub_26" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_282" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/value/kernel/adam_m" + input: "add_89" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_283" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/value/kernel/adam_v" + input: "add_90" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_1/attention/self/value/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/self/value/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_1/attention/self/value/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/self/value/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/self/value/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_146/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_146" + op: "Mul" + input: "Mul_146/x" + input: "bert/encoder/layer_1/attention/self/value/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_147/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_147" + op: "Mul" + input: "Mul_147/x" + input: "clip_by_global_norm/clip_by_global_norm/_26" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_93" + op: "Add" + input: "Mul_146" + input: "Mul_147" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_148/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_148" + op: "Mul" + input: "Mul_148/x" + input: "bert/encoder/layer_1/attention/self/value/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_26" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_26" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_149/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_149" + op: "Mul" + input: "Mul_149/x" + input: "Square_26" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_94" + op: "Add" + input: "Mul_148" + input: "Mul_149" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_26" + op: "Sqrt" + input: "add_94" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_95/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_95" + op: "Add" + input: "Sqrt_26" + input: "add_95/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_27" + op: "RealDiv" + input: "add_93" + input: "add_95" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_150" + op: "Mul" + input: "add_2" + input: "truediv_27" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_27" + op: "Sub" + input: "bert/encoder/layer_1/attention/self/value/bias/read" + input: "mul_150" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_284" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/value/bias" + input: "sub_27" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_285" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/value/bias/adam_m" + input: "add_93" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_286" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/value/bias/adam_v" + input: "add_94" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_151/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_151" + op: "Mul" + input: "Mul_151/x" + input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_152/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_152" + op: "Mul" + input: "Mul_152/x" + input: "clip_by_global_norm/clip_by_global_norm/_27" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_96" + op: "Add" + input: "Mul_151" + input: "Mul_152" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_153/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_153" + op: "Mul" + input: "Mul_153/x" + input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_27" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_27" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_154/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_154" + op: "Mul" + input: "Mul_154/x" + input: "Square_27" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_97" + op: "Add" + input: "Mul_153" + input: "Mul_154" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_27" + op: "Sqrt" + input: "add_97" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_98/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_98" + op: "Add" + input: "Sqrt_27" + input: "add_98/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_28" + op: "RealDiv" + input: "add_96" + input: "add_98" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_155/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_155" + op: "Mul" + input: "mul_155/x" + input: "bert/encoder/layer_1/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_99" + op: "Add" + input: "truediv_28" + input: "mul_155" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_156" + op: "Mul" + input: "add_2" + input: "add_99" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_28" + op: "Sub" + input: "bert/encoder/layer_1/attention/output/dense/kernel/read" + input: "mul_156" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_287" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/dense/kernel" + input: "sub_28" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_288" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m" + input: "add_96" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_289" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v" + input: "add_97" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_1/attention/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_1/attention/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_157/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_157" + op: "Mul" + input: "Mul_157/x" + input: "bert/encoder/layer_1/attention/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_158/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_158" + op: "Mul" + input: "Mul_158/x" + input: "clip_by_global_norm/clip_by_global_norm/_28" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_100" + op: "Add" + input: "Mul_157" + input: "Mul_158" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_159/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_159" + op: "Mul" + input: "Mul_159/x" + input: "bert/encoder/layer_1/attention/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_28" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_28" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_160/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_160" + op: "Mul" + input: "Mul_160/x" + input: "Square_28" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_101" + op: "Add" + input: "Mul_159" + input: "Mul_160" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_28" + op: "Sqrt" + input: "add_101" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_102/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_102" + op: "Add" + input: "Sqrt_28" + input: "add_102/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_29" + op: "RealDiv" + input: "add_100" + input: "add_102" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_161" + op: "Mul" + input: "add_2" + input: "truediv_29" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_29" + op: "Sub" + input: "bert/encoder/layer_1/attention/output/dense/bias/read" + input: "mul_161" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_290" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/dense/bias" + input: "sub_29" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_291" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/dense/bias/adam_m" + input: "add_100" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_292" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/dense/bias/adam_v" + input: "add_101" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_162/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_162" + op: "Mul" + input: "Mul_162/x" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_163/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_163" + op: "Mul" + input: "Mul_163/x" + input: "clip_by_global_norm/clip_by_global_norm/_29" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_103" + op: "Add" + input: "Mul_162" + input: "Mul_163" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_164/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_164" + op: "Mul" + input: "Mul_164/x" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_29" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_29" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_165/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_165" + op: "Mul" + input: "Mul_165/x" + input: "Square_29" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_104" + op: "Add" + input: "Mul_164" + input: "Mul_165" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_29" + op: "Sqrt" + input: "add_104" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_105/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_105" + op: "Add" + input: "Sqrt_29" + input: "add_105/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_30" + op: "RealDiv" + input: "add_103" + input: "add_105" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_166" + op: "Mul" + input: "add_2" + input: "truediv_30" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_30" + op: "Sub" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/read" + input: "mul_166" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_293" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta" + input: "sub_30" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_294" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m" + input: "add_103" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_295" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v" + input: "add_104" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_167/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_167" + op: "Mul" + input: "Mul_167/x" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_168/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_168" + op: "Mul" + input: "Mul_168/x" + input: "clip_by_global_norm/clip_by_global_norm/_30" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_106" + op: "Add" + input: "Mul_167" + input: "Mul_168" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_169/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_169" + op: "Mul" + input: "Mul_169/x" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_30" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_30" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_170/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_170" + op: "Mul" + input: "Mul_170/x" + input: "Square_30" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_107" + op: "Add" + input: "Mul_169" + input: "Mul_170" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_30" + op: "Sqrt" + input: "add_107" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_108/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_108" + op: "Add" + input: "Sqrt_30" + input: "add_108/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_31" + op: "RealDiv" + input: "add_106" + input: "add_108" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_171" + op: "Mul" + input: "add_2" + input: "truediv_31" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_31" + op: "Sub" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/read" + input: "mul_171" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_296" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma" + input: "sub_31" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_297" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m" + input: "add_106" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_298" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v" + input: "add_107" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_172/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_172" + op: "Mul" + input: "Mul_172/x" + input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_173/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_173" + op: "Mul" + input: "Mul_173/x" + input: "clip_by_global_norm/clip_by_global_norm/_31" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_109" + op: "Add" + input: "Mul_172" + input: "Mul_173" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_174/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_174" + op: "Mul" + input: "Mul_174/x" + input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_31" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_31" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_175/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_175" + op: "Mul" + input: "Mul_175/x" + input: "Square_31" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_110" + op: "Add" + input: "Mul_174" + input: "Mul_175" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_31" + op: "Sqrt" + input: "add_110" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_111/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_111" + op: "Add" + input: "Sqrt_31" + input: "add_111/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_32" + op: "RealDiv" + input: "add_109" + input: "add_111" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_176/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_176" + op: "Mul" + input: "mul_176/x" + input: "bert/encoder/layer_1/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_112" + op: "Add" + input: "truediv_32" + input: "mul_176" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_177" + op: "Mul" + input: "add_2" + input: "add_112" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_32" + op: "Sub" + input: "bert/encoder/layer_1/intermediate/dense/kernel/read" + input: "mul_177" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_299" + op: "Assign" + input: "bert/encoder/layer_1/intermediate/dense/kernel" + input: "sub_32" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_300" + op: "Assign" + input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m" + input: "add_109" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_301" + op: "Assign" + input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v" + input: "add_110" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/bias/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_1/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_1/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_1/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_1/intermediate/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_1/intermediate/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/bias/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_1/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_1/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_1/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_1/intermediate/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/intermediate/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_1/intermediate/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_178/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_178" + op: "Mul" + input: "Mul_178/x" + input: "bert/encoder/layer_1/intermediate/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_179/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_179" + op: "Mul" + input: "Mul_179/x" + input: "clip_by_global_norm/clip_by_global_norm/_32" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_113" + op: "Add" + input: "Mul_178" + input: "Mul_179" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_180/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_180" + op: "Mul" + input: "Mul_180/x" + input: "bert/encoder/layer_1/intermediate/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_32" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_32" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_181/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_181" + op: "Mul" + input: "Mul_181/x" + input: "Square_32" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_114" + op: "Add" + input: "Mul_180" + input: "Mul_181" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_32" + op: "Sqrt" + input: "add_114" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_115/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_115" + op: "Add" + input: "Sqrt_32" + input: "add_115/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_33" + op: "RealDiv" + input: "add_113" + input: "add_115" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_182" + op: "Mul" + input: "add_2" + input: "truediv_33" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_33" + op: "Sub" + input: "bert/encoder/layer_1/intermediate/dense/bias/read" + input: "mul_182" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_302" + op: "Assign" + input: "bert/encoder/layer_1/intermediate/dense/bias" + input: "sub_33" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_303" + op: "Assign" + input: "bert/encoder/layer_1/intermediate/dense/bias/adam_m" + input: "add_113" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_304" + op: "Assign" + input: "bert/encoder/layer_1/intermediate/dense/bias/adam_v" + input: "add_114" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_1/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_1/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_1/output/dense/kernel/adam_m" + input: "bert/encoder/layer_1/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_1/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_1/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_1/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_1/output/dense/kernel/adam_v" + input: "bert/encoder/layer_1/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_1/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_183/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_183" + op: "Mul" + input: "Mul_183/x" + input: "bert/encoder/layer_1/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_184/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_184" + op: "Mul" + input: "Mul_184/x" + input: "clip_by_global_norm/clip_by_global_norm/_33" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_116" + op: "Add" + input: "Mul_183" + input: "Mul_184" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_185/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_185" + op: "Mul" + input: "Mul_185/x" + input: "bert/encoder/layer_1/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_33" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_33" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_186/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_186" + op: "Mul" + input: "Mul_186/x" + input: "Square_33" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_117" + op: "Add" + input: "Mul_185" + input: "Mul_186" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_33" + op: "Sqrt" + input: "add_117" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_118/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_118" + op: "Add" + input: "Sqrt_33" + input: "add_118/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_34" + op: "RealDiv" + input: "add_116" + input: "add_118" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_187/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_187" + op: "Mul" + input: "mul_187/x" + input: "bert/encoder/layer_1/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_119" + op: "Add" + input: "truediv_34" + input: "mul_187" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_188" + op: "Mul" + input: "add_2" + input: "add_119" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_34" + op: "Sub" + input: "bert/encoder/layer_1/output/dense/kernel/read" + input: "mul_188" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_305" + op: "Assign" + input: "bert/encoder/layer_1/output/dense/kernel" + input: "sub_34" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_306" + op: "Assign" + input: "bert/encoder/layer_1/output/dense/kernel/adam_m" + input: "add_116" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_307" + op: "Assign" + input: "bert/encoder/layer_1/output/dense/kernel/adam_v" + input: "add_117" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_1/output/dense/bias/adam_m" + input: "bert/encoder/layer_1/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_1/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_1/output/dense/bias/adam_v" + input: "bert/encoder/layer_1/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_1/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_189/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_189" + op: "Mul" + input: "Mul_189/x" + input: "bert/encoder/layer_1/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_190/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_190" + op: "Mul" + input: "Mul_190/x" + input: "clip_by_global_norm/clip_by_global_norm/_34" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_120" + op: "Add" + input: "Mul_189" + input: "Mul_190" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_191/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_191" + op: "Mul" + input: "Mul_191/x" + input: "bert/encoder/layer_1/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_34" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_34" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_192/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_192" + op: "Mul" + input: "Mul_192/x" + input: "Square_34" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_121" + op: "Add" + input: "Mul_191" + input: "Mul_192" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_34" + op: "Sqrt" + input: "add_121" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_122/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_122" + op: "Add" + input: "Sqrt_34" + input: "add_122/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_35" + op: "RealDiv" + input: "add_120" + input: "add_122" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_193" + op: "Mul" + input: "add_2" + input: "truediv_35" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_35" + op: "Sub" + input: "bert/encoder/layer_1/output/dense/bias/read" + input: "mul_193" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_308" + op: "Assign" + input: "bert/encoder/layer_1/output/dense/bias" + input: "sub_35" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_309" + op: "Assign" + input: "bert/encoder/layer_1/output/dense/bias/adam_m" + input: "add_120" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_310" + op: "Assign" + input: "bert/encoder/layer_1/output/dense/bias/adam_v" + input: "add_121" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_194/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_194" + op: "Mul" + input: "Mul_194/x" + input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_195/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_195" + op: "Mul" + input: "Mul_195/x" + input: "clip_by_global_norm/clip_by_global_norm/_35" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_123" + op: "Add" + input: "Mul_194" + input: "Mul_195" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_196/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_196" + op: "Mul" + input: "Mul_196/x" + input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_35" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_35" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_197/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_197" + op: "Mul" + input: "Mul_197/x" + input: "Square_35" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_124" + op: "Add" + input: "Mul_196" + input: "Mul_197" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_35" + op: "Sqrt" + input: "add_124" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_125/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_125" + op: "Add" + input: "Sqrt_35" + input: "add_125/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_36" + op: "RealDiv" + input: "add_123" + input: "add_125" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_198" + op: "Mul" + input: "add_2" + input: "truediv_36" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_36" + op: "Sub" + input: "bert/encoder/layer_1/output/LayerNorm/beta/read" + input: "mul_198" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_311" + op: "Assign" + input: "bert/encoder/layer_1/output/LayerNorm/beta" + input: "sub_36" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_312" + op: "Assign" + input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m" + input: "add_123" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_313" + op: "Assign" + input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v" + input: "add_124" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_199/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_199" + op: "Mul" + input: "Mul_199/x" + input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_200/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_200" + op: "Mul" + input: "Mul_200/x" + input: "clip_by_global_norm/clip_by_global_norm/_36" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_126" + op: "Add" + input: "Mul_199" + input: "Mul_200" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_201/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_201" + op: "Mul" + input: "Mul_201/x" + input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_36" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_36" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_202/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_202" + op: "Mul" + input: "Mul_202/x" + input: "Square_36" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_127" + op: "Add" + input: "Mul_201" + input: "Mul_202" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_36" + op: "Sqrt" + input: "add_127" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_128/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_128" + op: "Add" + input: "Sqrt_36" + input: "add_128/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_37" + op: "RealDiv" + input: "add_126" + input: "add_128" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_203" + op: "Mul" + input: "add_2" + input: "truediv_37" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_37" + op: "Sub" + input: "bert/encoder/layer_1/output/LayerNorm/gamma/read" + input: "mul_203" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_314" + op: "Assign" + input: "bert/encoder/layer_1/output/LayerNorm/gamma" + input: "sub_37" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_315" + op: "Assign" + input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m" + input: "add_126" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_316" + op: "Assign" + input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v" + input: "add_127" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_2/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_2/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_2/attention/self/query/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/self/query/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_2/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_2/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_2/attention/self/query/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/self/query/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_204/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_204" + op: "Mul" + input: "Mul_204/x" + input: "bert/encoder/layer_2/attention/self/query/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_205/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_205" + op: "Mul" + input: "Mul_205/x" + input: "clip_by_global_norm/clip_by_global_norm/_37" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_129" + op: "Add" + input: "Mul_204" + input: "Mul_205" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_206/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_206" + op: "Mul" + input: "Mul_206/x" + input: "bert/encoder/layer_2/attention/self/query/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_37" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_37" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_207/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_207" + op: "Mul" + input: "Mul_207/x" + input: "Square_37" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_130" + op: "Add" + input: "Mul_206" + input: "Mul_207" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_37" + op: "Sqrt" + input: "add_130" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_131/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_131" + op: "Add" + input: "Sqrt_37" + input: "add_131/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_38" + op: "RealDiv" + input: "add_129" + input: "add_131" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_208/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_208" + op: "Mul" + input: "mul_208/x" + input: "bert/encoder/layer_2/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_132" + op: "Add" + input: "truediv_38" + input: "mul_208" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_209" + op: "Mul" + input: "add_2" + input: "add_132" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_38" + op: "Sub" + input: "bert/encoder/layer_2/attention/self/query/kernel/read" + input: "mul_209" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_317" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/query/kernel" + input: "sub_38" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_318" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/query/kernel/adam_m" + input: "add_129" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_319" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/query/kernel/adam_v" + input: "add_130" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_2/attention/self/query/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/self/query/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_2/attention/self/query/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/query/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/self/query/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_210/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_210" + op: "Mul" + input: "Mul_210/x" + input: "bert/encoder/layer_2/attention/self/query/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_211/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_211" + op: "Mul" + input: "Mul_211/x" + input: "clip_by_global_norm/clip_by_global_norm/_38" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_133" + op: "Add" + input: "Mul_210" + input: "Mul_211" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_212/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_212" + op: "Mul" + input: "Mul_212/x" + input: "bert/encoder/layer_2/attention/self/query/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_38" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_38" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_213/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_213" + op: "Mul" + input: "Mul_213/x" + input: "Square_38" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_134" + op: "Add" + input: "Mul_212" + input: "Mul_213" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_38" + op: "Sqrt" + input: "add_134" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_135/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_135" + op: "Add" + input: "Sqrt_38" + input: "add_135/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_39" + op: "RealDiv" + input: "add_133" + input: "add_135" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_214" + op: "Mul" + input: "add_2" + input: "truediv_39" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_39" + op: "Sub" + input: "bert/encoder/layer_2/attention/self/query/bias/read" + input: "mul_214" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_320" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/query/bias" + input: "sub_39" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_321" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/query/bias/adam_m" + input: "add_133" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_322" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/query/bias/adam_v" + input: "add_134" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_2/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_2/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_2/attention/self/key/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/self/key/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_2/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_2/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_2/attention/self/key/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/self/key/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_215/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_215" + op: "Mul" + input: "Mul_215/x" + input: "bert/encoder/layer_2/attention/self/key/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_216/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_216" + op: "Mul" + input: "Mul_216/x" + input: "clip_by_global_norm/clip_by_global_norm/_39" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_136" + op: "Add" + input: "Mul_215" + input: "Mul_216" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_217/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_217" + op: "Mul" + input: "Mul_217/x" + input: "bert/encoder/layer_2/attention/self/key/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_39" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_39" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_218/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_218" + op: "Mul" + input: "Mul_218/x" + input: "Square_39" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_137" + op: "Add" + input: "Mul_217" + input: "Mul_218" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_39" + op: "Sqrt" + input: "add_137" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_138/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_138" + op: "Add" + input: "Sqrt_39" + input: "add_138/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_40" + op: "RealDiv" + input: "add_136" + input: "add_138" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_219/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_219" + op: "Mul" + input: "mul_219/x" + input: "bert/encoder/layer_2/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_139" + op: "Add" + input: "truediv_40" + input: "mul_219" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_220" + op: "Mul" + input: "add_2" + input: "add_139" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_40" + op: "Sub" + input: "bert/encoder/layer_2/attention/self/key/kernel/read" + input: "mul_220" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_323" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/key/kernel" + input: "sub_40" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_324" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/key/kernel/adam_m" + input: "add_136" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_325" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/key/kernel/adam_v" + input: "add_137" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_2/attention/self/key/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/self/key/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_2/attention/self/key/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/key/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/self/key/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_221/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_221" + op: "Mul" + input: "Mul_221/x" + input: "bert/encoder/layer_2/attention/self/key/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_222/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_222" + op: "Mul" + input: "Mul_222/x" + input: "clip_by_global_norm/clip_by_global_norm/_40" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_140" + op: "Add" + input: "Mul_221" + input: "Mul_222" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_223/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_223" + op: "Mul" + input: "Mul_223/x" + input: "bert/encoder/layer_2/attention/self/key/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_40" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_40" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_224/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_224" + op: "Mul" + input: "Mul_224/x" + input: "Square_40" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_141" + op: "Add" + input: "Mul_223" + input: "Mul_224" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_40" + op: "Sqrt" + input: "add_141" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_142/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_142" + op: "Add" + input: "Sqrt_40" + input: "add_142/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_41" + op: "RealDiv" + input: "add_140" + input: "add_142" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_225" + op: "Mul" + input: "add_2" + input: "truediv_41" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_41" + op: "Sub" + input: "bert/encoder/layer_2/attention/self/key/bias/read" + input: "mul_225" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_326" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/key/bias" + input: "sub_41" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_327" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/key/bias/adam_m" + input: "add_140" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_328" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/key/bias/adam_v" + input: "add_141" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_2/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_2/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_2/attention/self/value/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/self/value/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_2/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_2/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_2/attention/self/value/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/self/value/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_226/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_226" + op: "Mul" + input: "Mul_226/x" + input: "bert/encoder/layer_2/attention/self/value/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_227/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_227" + op: "Mul" + input: "Mul_227/x" + input: "clip_by_global_norm/clip_by_global_norm/_41" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_143" + op: "Add" + input: "Mul_226" + input: "Mul_227" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_228/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_228" + op: "Mul" + input: "Mul_228/x" + input: "bert/encoder/layer_2/attention/self/value/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_41" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_41" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_229/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_229" + op: "Mul" + input: "Mul_229/x" + input: "Square_41" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_144" + op: "Add" + input: "Mul_228" + input: "Mul_229" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_41" + op: "Sqrt" + input: "add_144" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_145/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_145" + op: "Add" + input: "Sqrt_41" + input: "add_145/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_42" + op: "RealDiv" + input: "add_143" + input: "add_145" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_230/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_230" + op: "Mul" + input: "mul_230/x" + input: "bert/encoder/layer_2/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_146" + op: "Add" + input: "truediv_42" + input: "mul_230" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_231" + op: "Mul" + input: "add_2" + input: "add_146" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_42" + op: "Sub" + input: "bert/encoder/layer_2/attention/self/value/kernel/read" + input: "mul_231" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_329" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/value/kernel" + input: "sub_42" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_330" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/value/kernel/adam_m" + input: "add_143" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_331" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/value/kernel/adam_v" + input: "add_144" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_2/attention/self/value/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/self/value/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_2/attention/self/value/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/self/value/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/self/value/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_232/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_232" + op: "Mul" + input: "Mul_232/x" + input: "bert/encoder/layer_2/attention/self/value/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_233/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_233" + op: "Mul" + input: "Mul_233/x" + input: "clip_by_global_norm/clip_by_global_norm/_42" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_147" + op: "Add" + input: "Mul_232" + input: "Mul_233" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_234/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_234" + op: "Mul" + input: "Mul_234/x" + input: "bert/encoder/layer_2/attention/self/value/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_42" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_42" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_235/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_235" + op: "Mul" + input: "Mul_235/x" + input: "Square_42" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_148" + op: "Add" + input: "Mul_234" + input: "Mul_235" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_42" + op: "Sqrt" + input: "add_148" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_149/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_149" + op: "Add" + input: "Sqrt_42" + input: "add_149/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_43" + op: "RealDiv" + input: "add_147" + input: "add_149" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_236" + op: "Mul" + input: "add_2" + input: "truediv_43" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_43" + op: "Sub" + input: "bert/encoder/layer_2/attention/self/value/bias/read" + input: "mul_236" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_332" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/value/bias" + input: "sub_43" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_333" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/value/bias/adam_m" + input: "add_147" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_334" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/value/bias/adam_v" + input: "add_148" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_237/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_237" + op: "Mul" + input: "Mul_237/x" + input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_238/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_238" + op: "Mul" + input: "Mul_238/x" + input: "clip_by_global_norm/clip_by_global_norm/_43" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_150" + op: "Add" + input: "Mul_237" + input: "Mul_238" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_239/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_239" + op: "Mul" + input: "Mul_239/x" + input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_43" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_43" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_240/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_240" + op: "Mul" + input: "Mul_240/x" + input: "Square_43" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_151" + op: "Add" + input: "Mul_239" + input: "Mul_240" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_43" + op: "Sqrt" + input: "add_151" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_152/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_152" + op: "Add" + input: "Sqrt_43" + input: "add_152/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_44" + op: "RealDiv" + input: "add_150" + input: "add_152" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_241/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_241" + op: "Mul" + input: "mul_241/x" + input: "bert/encoder/layer_2/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_153" + op: "Add" + input: "truediv_44" + input: "mul_241" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_242" + op: "Mul" + input: "add_2" + input: "add_153" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_44" + op: "Sub" + input: "bert/encoder/layer_2/attention/output/dense/kernel/read" + input: "mul_242" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_335" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/dense/kernel" + input: "sub_44" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_336" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m" + input: "add_150" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_337" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v" + input: "add_151" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_2/attention/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_2/attention/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_243/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_243" + op: "Mul" + input: "Mul_243/x" + input: "bert/encoder/layer_2/attention/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_244/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_244" + op: "Mul" + input: "Mul_244/x" + input: "clip_by_global_norm/clip_by_global_norm/_44" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_154" + op: "Add" + input: "Mul_243" + input: "Mul_244" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_245/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_245" + op: "Mul" + input: "Mul_245/x" + input: "bert/encoder/layer_2/attention/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_44" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_44" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_246/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_246" + op: "Mul" + input: "Mul_246/x" + input: "Square_44" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_155" + op: "Add" + input: "Mul_245" + input: "Mul_246" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_44" + op: "Sqrt" + input: "add_155" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_156/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_156" + op: "Add" + input: "Sqrt_44" + input: "add_156/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_45" + op: "RealDiv" + input: "add_154" + input: "add_156" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_247" + op: "Mul" + input: "add_2" + input: "truediv_45" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_45" + op: "Sub" + input: "bert/encoder/layer_2/attention/output/dense/bias/read" + input: "mul_247" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_338" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/dense/bias" + input: "sub_45" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_339" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/dense/bias/adam_m" + input: "add_154" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_340" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/dense/bias/adam_v" + input: "add_155" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_248/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_248" + op: "Mul" + input: "Mul_248/x" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_249/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_249" + op: "Mul" + input: "Mul_249/x" + input: "clip_by_global_norm/clip_by_global_norm/_45" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_157" + op: "Add" + input: "Mul_248" + input: "Mul_249" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_250/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_250" + op: "Mul" + input: "Mul_250/x" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_45" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_45" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_251/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_251" + op: "Mul" + input: "Mul_251/x" + input: "Square_45" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_158" + op: "Add" + input: "Mul_250" + input: "Mul_251" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_45" + op: "Sqrt" + input: "add_158" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_159/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_159" + op: "Add" + input: "Sqrt_45" + input: "add_159/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_46" + op: "RealDiv" + input: "add_157" + input: "add_159" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_252" + op: "Mul" + input: "add_2" + input: "truediv_46" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_46" + op: "Sub" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/read" + input: "mul_252" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_341" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta" + input: "sub_46" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_342" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m" + input: "add_157" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_343" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v" + input: "add_158" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_253/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_253" + op: "Mul" + input: "Mul_253/x" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_254/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_254" + op: "Mul" + input: "Mul_254/x" + input: "clip_by_global_norm/clip_by_global_norm/_46" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_160" + op: "Add" + input: "Mul_253" + input: "Mul_254" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_255/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_255" + op: "Mul" + input: "Mul_255/x" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_46" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_46" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_256/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_256" + op: "Mul" + input: "Mul_256/x" + input: "Square_46" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_161" + op: "Add" + input: "Mul_255" + input: "Mul_256" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_46" + op: "Sqrt" + input: "add_161" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_162/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_162" + op: "Add" + input: "Sqrt_46" + input: "add_162/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_47" + op: "RealDiv" + input: "add_160" + input: "add_162" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_257" + op: "Mul" + input: "add_2" + input: "truediv_47" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_47" + op: "Sub" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/read" + input: "mul_257" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_344" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma" + input: "sub_47" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_345" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m" + input: "add_160" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_346" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v" + input: "add_161" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_258/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_258" + op: "Mul" + input: "Mul_258/x" + input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_259/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_259" + op: "Mul" + input: "Mul_259/x" + input: "clip_by_global_norm/clip_by_global_norm/_47" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_163" + op: "Add" + input: "Mul_258" + input: "Mul_259" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_260/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_260" + op: "Mul" + input: "Mul_260/x" + input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_47" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_47" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_261/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_261" + op: "Mul" + input: "Mul_261/x" + input: "Square_47" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_164" + op: "Add" + input: "Mul_260" + input: "Mul_261" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_47" + op: "Sqrt" + input: "add_164" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_165/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_165" + op: "Add" + input: "Sqrt_47" + input: "add_165/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_48" + op: "RealDiv" + input: "add_163" + input: "add_165" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_262/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_262" + op: "Mul" + input: "mul_262/x" + input: "bert/encoder/layer_2/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_166" + op: "Add" + input: "truediv_48" + input: "mul_262" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_263" + op: "Mul" + input: "add_2" + input: "add_166" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_48" + op: "Sub" + input: "bert/encoder/layer_2/intermediate/dense/kernel/read" + input: "mul_263" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_347" + op: "Assign" + input: "bert/encoder/layer_2/intermediate/dense/kernel" + input: "sub_48" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_348" + op: "Assign" + input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m" + input: "add_163" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_349" + op: "Assign" + input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v" + input: "add_164" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/bias/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_2/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_2/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_2/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_2/intermediate/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_2/intermediate/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/bias/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_2/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_2/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_2/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_2/intermediate/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/intermediate/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_2/intermediate/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_264/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_264" + op: "Mul" + input: "Mul_264/x" + input: "bert/encoder/layer_2/intermediate/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_265/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_265" + op: "Mul" + input: "Mul_265/x" + input: "clip_by_global_norm/clip_by_global_norm/_48" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_167" + op: "Add" + input: "Mul_264" + input: "Mul_265" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_266/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_266" + op: "Mul" + input: "Mul_266/x" + input: "bert/encoder/layer_2/intermediate/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_48" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_48" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_267/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_267" + op: "Mul" + input: "Mul_267/x" + input: "Square_48" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_168" + op: "Add" + input: "Mul_266" + input: "Mul_267" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_48" + op: "Sqrt" + input: "add_168" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_169/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_169" + op: "Add" + input: "Sqrt_48" + input: "add_169/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_49" + op: "RealDiv" + input: "add_167" + input: "add_169" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_268" + op: "Mul" + input: "add_2" + input: "truediv_49" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_49" + op: "Sub" + input: "bert/encoder/layer_2/intermediate/dense/bias/read" + input: "mul_268" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_350" + op: "Assign" + input: "bert/encoder/layer_2/intermediate/dense/bias" + input: "sub_49" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_351" + op: "Assign" + input: "bert/encoder/layer_2/intermediate/dense/bias/adam_m" + input: "add_167" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_352" + op: "Assign" + input: "bert/encoder/layer_2/intermediate/dense/bias/adam_v" + input: "add_168" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_2/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_2/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_2/output/dense/kernel/adam_m" + input: "bert/encoder/layer_2/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_2/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_2/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_2/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_2/output/dense/kernel/adam_v" + input: "bert/encoder/layer_2/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_2/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_269/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_269" + op: "Mul" + input: "Mul_269/x" + input: "bert/encoder/layer_2/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_270/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_270" + op: "Mul" + input: "Mul_270/x" + input: "clip_by_global_norm/clip_by_global_norm/_49" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_170" + op: "Add" + input: "Mul_269" + input: "Mul_270" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_271/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_271" + op: "Mul" + input: "Mul_271/x" + input: "bert/encoder/layer_2/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_49" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_49" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_272/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_272" + op: "Mul" + input: "Mul_272/x" + input: "Square_49" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_171" + op: "Add" + input: "Mul_271" + input: "Mul_272" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_49" + op: "Sqrt" + input: "add_171" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_172/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_172" + op: "Add" + input: "Sqrt_49" + input: "add_172/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_50" + op: "RealDiv" + input: "add_170" + input: "add_172" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_273/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_273" + op: "Mul" + input: "mul_273/x" + input: "bert/encoder/layer_2/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_173" + op: "Add" + input: "truediv_50" + input: "mul_273" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_274" + op: "Mul" + input: "add_2" + input: "add_173" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_50" + op: "Sub" + input: "bert/encoder/layer_2/output/dense/kernel/read" + input: "mul_274" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_353" + op: "Assign" + input: "bert/encoder/layer_2/output/dense/kernel" + input: "sub_50" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_354" + op: "Assign" + input: "bert/encoder/layer_2/output/dense/kernel/adam_m" + input: "add_170" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_355" + op: "Assign" + input: "bert/encoder/layer_2/output/dense/kernel/adam_v" + input: "add_171" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_2/output/dense/bias/adam_m" + input: "bert/encoder/layer_2/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_2/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_2/output/dense/bias/adam_v" + input: "bert/encoder/layer_2/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_2/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_275/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_275" + op: "Mul" + input: "Mul_275/x" + input: "bert/encoder/layer_2/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_276/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_276" + op: "Mul" + input: "Mul_276/x" + input: "clip_by_global_norm/clip_by_global_norm/_50" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_174" + op: "Add" + input: "Mul_275" + input: "Mul_276" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_277/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_277" + op: "Mul" + input: "Mul_277/x" + input: "bert/encoder/layer_2/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_50" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_50" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_278/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_278" + op: "Mul" + input: "Mul_278/x" + input: "Square_50" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_175" + op: "Add" + input: "Mul_277" + input: "Mul_278" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_50" + op: "Sqrt" + input: "add_175" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_176/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_176" + op: "Add" + input: "Sqrt_50" + input: "add_176/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_51" + op: "RealDiv" + input: "add_174" + input: "add_176" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_279" + op: "Mul" + input: "add_2" + input: "truediv_51" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_51" + op: "Sub" + input: "bert/encoder/layer_2/output/dense/bias/read" + input: "mul_279" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_356" + op: "Assign" + input: "bert/encoder/layer_2/output/dense/bias" + input: "sub_51" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_357" + op: "Assign" + input: "bert/encoder/layer_2/output/dense/bias/adam_m" + input: "add_174" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_358" + op: "Assign" + input: "bert/encoder/layer_2/output/dense/bias/adam_v" + input: "add_175" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_280/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_280" + op: "Mul" + input: "Mul_280/x" + input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_281/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_281" + op: "Mul" + input: "Mul_281/x" + input: "clip_by_global_norm/clip_by_global_norm/_51" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_177" + op: "Add" + input: "Mul_280" + input: "Mul_281" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_282/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_282" + op: "Mul" + input: "Mul_282/x" + input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_51" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_51" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_283/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_283" + op: "Mul" + input: "Mul_283/x" + input: "Square_51" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_178" + op: "Add" + input: "Mul_282" + input: "Mul_283" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_51" + op: "Sqrt" + input: "add_178" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_179/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_179" + op: "Add" + input: "Sqrt_51" + input: "add_179/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_52" + op: "RealDiv" + input: "add_177" + input: "add_179" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_284" + op: "Mul" + input: "add_2" + input: "truediv_52" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_52" + op: "Sub" + input: "bert/encoder/layer_2/output/LayerNorm/beta/read" + input: "mul_284" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_359" + op: "Assign" + input: "bert/encoder/layer_2/output/LayerNorm/beta" + input: "sub_52" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_360" + op: "Assign" + input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m" + input: "add_177" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_361" + op: "Assign" + input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v" + input: "add_178" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_285/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_285" + op: "Mul" + input: "Mul_285/x" + input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_286/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_286" + op: "Mul" + input: "Mul_286/x" + input: "clip_by_global_norm/clip_by_global_norm/_52" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_180" + op: "Add" + input: "Mul_285" + input: "Mul_286" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_287/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_287" + op: "Mul" + input: "Mul_287/x" + input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_52" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_52" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_288/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_288" + op: "Mul" + input: "Mul_288/x" + input: "Square_52" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_181" + op: "Add" + input: "Mul_287" + input: "Mul_288" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_52" + op: "Sqrt" + input: "add_181" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_182/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_182" + op: "Add" + input: "Sqrt_52" + input: "add_182/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_53" + op: "RealDiv" + input: "add_180" + input: "add_182" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_289" + op: "Mul" + input: "add_2" + input: "truediv_53" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_53" + op: "Sub" + input: "bert/encoder/layer_2/output/LayerNorm/gamma/read" + input: "mul_289" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_362" + op: "Assign" + input: "bert/encoder/layer_2/output/LayerNorm/gamma" + input: "sub_53" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_363" + op: "Assign" + input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m" + input: "add_180" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_364" + op: "Assign" + input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v" + input: "add_181" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_3/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_3/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_3/attention/self/query/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/self/query/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_3/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_3/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_3/attention/self/query/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/self/query/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_290/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_290" + op: "Mul" + input: "Mul_290/x" + input: "bert/encoder/layer_3/attention/self/query/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_291/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_291" + op: "Mul" + input: "Mul_291/x" + input: "clip_by_global_norm/clip_by_global_norm/_53" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_183" + op: "Add" + input: "Mul_290" + input: "Mul_291" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_292/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_292" + op: "Mul" + input: "Mul_292/x" + input: "bert/encoder/layer_3/attention/self/query/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_53" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_53" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_293/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_293" + op: "Mul" + input: "Mul_293/x" + input: "Square_53" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_184" + op: "Add" + input: "Mul_292" + input: "Mul_293" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_53" + op: "Sqrt" + input: "add_184" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_185/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_185" + op: "Add" + input: "Sqrt_53" + input: "add_185/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_54" + op: "RealDiv" + input: "add_183" + input: "add_185" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_294/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_294" + op: "Mul" + input: "mul_294/x" + input: "bert/encoder/layer_3/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_186" + op: "Add" + input: "truediv_54" + input: "mul_294" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_295" + op: "Mul" + input: "add_2" + input: "add_186" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_54" + op: "Sub" + input: "bert/encoder/layer_3/attention/self/query/kernel/read" + input: "mul_295" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_365" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/query/kernel" + input: "sub_54" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_366" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/query/kernel/adam_m" + input: "add_183" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_367" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/query/kernel/adam_v" + input: "add_184" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_3/attention/self/query/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/self/query/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_3/attention/self/query/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/query/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/self/query/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_296/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_296" + op: "Mul" + input: "Mul_296/x" + input: "bert/encoder/layer_3/attention/self/query/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_297/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_297" + op: "Mul" + input: "Mul_297/x" + input: "clip_by_global_norm/clip_by_global_norm/_54" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_187" + op: "Add" + input: "Mul_296" + input: "Mul_297" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_298/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_298" + op: "Mul" + input: "Mul_298/x" + input: "bert/encoder/layer_3/attention/self/query/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_54" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_54" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_299/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_299" + op: "Mul" + input: "Mul_299/x" + input: "Square_54" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_188" + op: "Add" + input: "Mul_298" + input: "Mul_299" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_54" + op: "Sqrt" + input: "add_188" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_189/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_189" + op: "Add" + input: "Sqrt_54" + input: "add_189/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_55" + op: "RealDiv" + input: "add_187" + input: "add_189" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_300" + op: "Mul" + input: "add_2" + input: "truediv_55" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_55" + op: "Sub" + input: "bert/encoder/layer_3/attention/self/query/bias/read" + input: "mul_300" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_368" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/query/bias" + input: "sub_55" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_369" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/query/bias/adam_m" + input: "add_187" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_370" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/query/bias/adam_v" + input: "add_188" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_3/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_3/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_3/attention/self/key/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/self/key/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_3/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_3/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_3/attention/self/key/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/self/key/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_301/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_301" + op: "Mul" + input: "Mul_301/x" + input: "bert/encoder/layer_3/attention/self/key/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_302/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_302" + op: "Mul" + input: "Mul_302/x" + input: "clip_by_global_norm/clip_by_global_norm/_55" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_190" + op: "Add" + input: "Mul_301" + input: "Mul_302" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_303/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_303" + op: "Mul" + input: "Mul_303/x" + input: "bert/encoder/layer_3/attention/self/key/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_55" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_55" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_304/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_304" + op: "Mul" + input: "Mul_304/x" + input: "Square_55" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_191" + op: "Add" + input: "Mul_303" + input: "Mul_304" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_55" + op: "Sqrt" + input: "add_191" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_192/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_192" + op: "Add" + input: "Sqrt_55" + input: "add_192/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_56" + op: "RealDiv" + input: "add_190" + input: "add_192" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_305/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_305" + op: "Mul" + input: "mul_305/x" + input: "bert/encoder/layer_3/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_193" + op: "Add" + input: "truediv_56" + input: "mul_305" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_306" + op: "Mul" + input: "add_2" + input: "add_193" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_56" + op: "Sub" + input: "bert/encoder/layer_3/attention/self/key/kernel/read" + input: "mul_306" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_371" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/key/kernel" + input: "sub_56" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_372" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/key/kernel/adam_m" + input: "add_190" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_373" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/key/kernel/adam_v" + input: "add_191" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_3/attention/self/key/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/self/key/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_3/attention/self/key/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/key/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/self/key/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_307/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_307" + op: "Mul" + input: "Mul_307/x" + input: "bert/encoder/layer_3/attention/self/key/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_308/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_308" + op: "Mul" + input: "Mul_308/x" + input: "clip_by_global_norm/clip_by_global_norm/_56" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_194" + op: "Add" + input: "Mul_307" + input: "Mul_308" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_309/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_309" + op: "Mul" + input: "Mul_309/x" + input: "bert/encoder/layer_3/attention/self/key/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_56" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_56" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_310/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_310" + op: "Mul" + input: "Mul_310/x" + input: "Square_56" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_195" + op: "Add" + input: "Mul_309" + input: "Mul_310" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_56" + op: "Sqrt" + input: "add_195" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_196/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_196" + op: "Add" + input: "Sqrt_56" + input: "add_196/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_57" + op: "RealDiv" + input: "add_194" + input: "add_196" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_311" + op: "Mul" + input: "add_2" + input: "truediv_57" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_57" + op: "Sub" + input: "bert/encoder/layer_3/attention/self/key/bias/read" + input: "mul_311" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_374" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/key/bias" + input: "sub_57" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_375" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/key/bias/adam_m" + input: "add_194" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_376" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/key/bias/adam_v" + input: "add_195" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_3/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_3/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_3/attention/self/value/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/self/value/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_3/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_3/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_3/attention/self/value/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/self/value/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_312/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_312" + op: "Mul" + input: "Mul_312/x" + input: "bert/encoder/layer_3/attention/self/value/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_313/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_313" + op: "Mul" + input: "Mul_313/x" + input: "clip_by_global_norm/clip_by_global_norm/_57" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_197" + op: "Add" + input: "Mul_312" + input: "Mul_313" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_314/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_314" + op: "Mul" + input: "Mul_314/x" + input: "bert/encoder/layer_3/attention/self/value/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_57" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_57" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_315/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_315" + op: "Mul" + input: "Mul_315/x" + input: "Square_57" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_198" + op: "Add" + input: "Mul_314" + input: "Mul_315" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_57" + op: "Sqrt" + input: "add_198" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_199/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_199" + op: "Add" + input: "Sqrt_57" + input: "add_199/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_58" + op: "RealDiv" + input: "add_197" + input: "add_199" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_316/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_316" + op: "Mul" + input: "mul_316/x" + input: "bert/encoder/layer_3/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_200" + op: "Add" + input: "truediv_58" + input: "mul_316" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_317" + op: "Mul" + input: "add_2" + input: "add_200" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_58" + op: "Sub" + input: "bert/encoder/layer_3/attention/self/value/kernel/read" + input: "mul_317" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_377" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/value/kernel" + input: "sub_58" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_378" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/value/kernel/adam_m" + input: "add_197" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_379" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/value/kernel/adam_v" + input: "add_198" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_3/attention/self/value/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/self/value/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_3/attention/self/value/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/self/value/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/self/value/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_318/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_318" + op: "Mul" + input: "Mul_318/x" + input: "bert/encoder/layer_3/attention/self/value/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_319/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_319" + op: "Mul" + input: "Mul_319/x" + input: "clip_by_global_norm/clip_by_global_norm/_58" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_201" + op: "Add" + input: "Mul_318" + input: "Mul_319" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_320/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_320" + op: "Mul" + input: "Mul_320/x" + input: "bert/encoder/layer_3/attention/self/value/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_58" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_58" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_321/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_321" + op: "Mul" + input: "Mul_321/x" + input: "Square_58" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_202" + op: "Add" + input: "Mul_320" + input: "Mul_321" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_58" + op: "Sqrt" + input: "add_202" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_203/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_203" + op: "Add" + input: "Sqrt_58" + input: "add_203/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_59" + op: "RealDiv" + input: "add_201" + input: "add_203" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_322" + op: "Mul" + input: "add_2" + input: "truediv_59" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_59" + op: "Sub" + input: "bert/encoder/layer_3/attention/self/value/bias/read" + input: "mul_322" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_380" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/value/bias" + input: "sub_59" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_381" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/value/bias/adam_m" + input: "add_201" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_382" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/value/bias/adam_v" + input: "add_202" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_323/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_323" + op: "Mul" + input: "Mul_323/x" + input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_324/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_324" + op: "Mul" + input: "Mul_324/x" + input: "clip_by_global_norm/clip_by_global_norm/_59" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_204" + op: "Add" + input: "Mul_323" + input: "Mul_324" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_325/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_325" + op: "Mul" + input: "Mul_325/x" + input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_59" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_59" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_326/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_326" + op: "Mul" + input: "Mul_326/x" + input: "Square_59" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_205" + op: "Add" + input: "Mul_325" + input: "Mul_326" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_59" + op: "Sqrt" + input: "add_205" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_206/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_206" + op: "Add" + input: "Sqrt_59" + input: "add_206/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_60" + op: "RealDiv" + input: "add_204" + input: "add_206" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_327/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_327" + op: "Mul" + input: "mul_327/x" + input: "bert/encoder/layer_3/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_207" + op: "Add" + input: "truediv_60" + input: "mul_327" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_328" + op: "Mul" + input: "add_2" + input: "add_207" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_60" + op: "Sub" + input: "bert/encoder/layer_3/attention/output/dense/kernel/read" + input: "mul_328" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_383" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/dense/kernel" + input: "sub_60" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_384" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m" + input: "add_204" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_385" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v" + input: "add_205" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_3/attention/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_3/attention/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_329/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_329" + op: "Mul" + input: "Mul_329/x" + input: "bert/encoder/layer_3/attention/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_330/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_330" + op: "Mul" + input: "Mul_330/x" + input: "clip_by_global_norm/clip_by_global_norm/_60" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_208" + op: "Add" + input: "Mul_329" + input: "Mul_330" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_331/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_331" + op: "Mul" + input: "Mul_331/x" + input: "bert/encoder/layer_3/attention/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_60" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_60" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_332/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_332" + op: "Mul" + input: "Mul_332/x" + input: "Square_60" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_209" + op: "Add" + input: "Mul_331" + input: "Mul_332" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_60" + op: "Sqrt" + input: "add_209" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_210/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_210" + op: "Add" + input: "Sqrt_60" + input: "add_210/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_61" + op: "RealDiv" + input: "add_208" + input: "add_210" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_333" + op: "Mul" + input: "add_2" + input: "truediv_61" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_61" + op: "Sub" + input: "bert/encoder/layer_3/attention/output/dense/bias/read" + input: "mul_333" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_386" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/dense/bias" + input: "sub_61" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_387" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/dense/bias/adam_m" + input: "add_208" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_388" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/dense/bias/adam_v" + input: "add_209" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_334/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_334" + op: "Mul" + input: "Mul_334/x" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_335/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_335" + op: "Mul" + input: "Mul_335/x" + input: "clip_by_global_norm/clip_by_global_norm/_61" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_211" + op: "Add" + input: "Mul_334" + input: "Mul_335" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_336/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_336" + op: "Mul" + input: "Mul_336/x" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_61" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_61" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_337/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_337" + op: "Mul" + input: "Mul_337/x" + input: "Square_61" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_212" + op: "Add" + input: "Mul_336" + input: "Mul_337" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_61" + op: "Sqrt" + input: "add_212" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_213/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_213" + op: "Add" + input: "Sqrt_61" + input: "add_213/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_62" + op: "RealDiv" + input: "add_211" + input: "add_213" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_338" + op: "Mul" + input: "add_2" + input: "truediv_62" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_62" + op: "Sub" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/read" + input: "mul_338" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_389" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta" + input: "sub_62" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_390" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m" + input: "add_211" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_391" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v" + input: "add_212" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_339/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_339" + op: "Mul" + input: "Mul_339/x" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_340/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_340" + op: "Mul" + input: "Mul_340/x" + input: "clip_by_global_norm/clip_by_global_norm/_62" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_214" + op: "Add" + input: "Mul_339" + input: "Mul_340" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_341/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_341" + op: "Mul" + input: "Mul_341/x" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_62" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_62" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_342/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_342" + op: "Mul" + input: "Mul_342/x" + input: "Square_62" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_215" + op: "Add" + input: "Mul_341" + input: "Mul_342" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_62" + op: "Sqrt" + input: "add_215" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_216/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_216" + op: "Add" + input: "Sqrt_62" + input: "add_216/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_63" + op: "RealDiv" + input: "add_214" + input: "add_216" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_343" + op: "Mul" + input: "add_2" + input: "truediv_63" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_63" + op: "Sub" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/read" + input: "mul_343" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_392" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma" + input: "sub_63" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_393" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m" + input: "add_214" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_394" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v" + input: "add_215" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_344/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_344" + op: "Mul" + input: "Mul_344/x" + input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_345/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_345" + op: "Mul" + input: "Mul_345/x" + input: "clip_by_global_norm/clip_by_global_norm/_63" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_217" + op: "Add" + input: "Mul_344" + input: "Mul_345" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_346/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_346" + op: "Mul" + input: "Mul_346/x" + input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_63" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_63" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_347/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_347" + op: "Mul" + input: "Mul_347/x" + input: "Square_63" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_218" + op: "Add" + input: "Mul_346" + input: "Mul_347" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_63" + op: "Sqrt" + input: "add_218" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_219/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_219" + op: "Add" + input: "Sqrt_63" + input: "add_219/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_64" + op: "RealDiv" + input: "add_217" + input: "add_219" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_348/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_348" + op: "Mul" + input: "mul_348/x" + input: "bert/encoder/layer_3/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_220" + op: "Add" + input: "truediv_64" + input: "mul_348" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_349" + op: "Mul" + input: "add_2" + input: "add_220" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_64" + op: "Sub" + input: "bert/encoder/layer_3/intermediate/dense/kernel/read" + input: "mul_349" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_395" + op: "Assign" + input: "bert/encoder/layer_3/intermediate/dense/kernel" + input: "sub_64" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_396" + op: "Assign" + input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m" + input: "add_217" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_397" + op: "Assign" + input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v" + input: "add_218" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/bias/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_3/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_3/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_3/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_3/intermediate/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_3/intermediate/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/bias/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_3/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_3/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_3/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_3/intermediate/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/intermediate/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_3/intermediate/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_350/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_350" + op: "Mul" + input: "Mul_350/x" + input: "bert/encoder/layer_3/intermediate/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_351/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_351" + op: "Mul" + input: "Mul_351/x" + input: "clip_by_global_norm/clip_by_global_norm/_64" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_221" + op: "Add" + input: "Mul_350" + input: "Mul_351" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_352/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_352" + op: "Mul" + input: "Mul_352/x" + input: "bert/encoder/layer_3/intermediate/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_64" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_64" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_353/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_353" + op: "Mul" + input: "Mul_353/x" + input: "Square_64" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_222" + op: "Add" + input: "Mul_352" + input: "Mul_353" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_64" + op: "Sqrt" + input: "add_222" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_223/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_223" + op: "Add" + input: "Sqrt_64" + input: "add_223/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_65" + op: "RealDiv" + input: "add_221" + input: "add_223" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_354" + op: "Mul" + input: "add_2" + input: "truediv_65" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_65" + op: "Sub" + input: "bert/encoder/layer_3/intermediate/dense/bias/read" + input: "mul_354" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_398" + op: "Assign" + input: "bert/encoder/layer_3/intermediate/dense/bias" + input: "sub_65" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_399" + op: "Assign" + input: "bert/encoder/layer_3/intermediate/dense/bias/adam_m" + input: "add_221" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_400" + op: "Assign" + input: "bert/encoder/layer_3/intermediate/dense/bias/adam_v" + input: "add_222" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_3/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_3/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_3/output/dense/kernel/adam_m" + input: "bert/encoder/layer_3/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_3/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_3/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_3/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_3/output/dense/kernel/adam_v" + input: "bert/encoder/layer_3/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_3/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_355/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_355" + op: "Mul" + input: "Mul_355/x" + input: "bert/encoder/layer_3/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_356/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_356" + op: "Mul" + input: "Mul_356/x" + input: "clip_by_global_norm/clip_by_global_norm/_65" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_224" + op: "Add" + input: "Mul_355" + input: "Mul_356" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_357/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_357" + op: "Mul" + input: "Mul_357/x" + input: "bert/encoder/layer_3/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_65" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_65" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_358/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_358" + op: "Mul" + input: "Mul_358/x" + input: "Square_65" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_225" + op: "Add" + input: "Mul_357" + input: "Mul_358" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_65" + op: "Sqrt" + input: "add_225" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_226/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_226" + op: "Add" + input: "Sqrt_65" + input: "add_226/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_66" + op: "RealDiv" + input: "add_224" + input: "add_226" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_359/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_359" + op: "Mul" + input: "mul_359/x" + input: "bert/encoder/layer_3/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_227" + op: "Add" + input: "truediv_66" + input: "mul_359" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_360" + op: "Mul" + input: "add_2" + input: "add_227" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_66" + op: "Sub" + input: "bert/encoder/layer_3/output/dense/kernel/read" + input: "mul_360" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_401" + op: "Assign" + input: "bert/encoder/layer_3/output/dense/kernel" + input: "sub_66" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_402" + op: "Assign" + input: "bert/encoder/layer_3/output/dense/kernel/adam_m" + input: "add_224" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_403" + op: "Assign" + input: "bert/encoder/layer_3/output/dense/kernel/adam_v" + input: "add_225" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_3/output/dense/bias/adam_m" + input: "bert/encoder/layer_3/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_3/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_3/output/dense/bias/adam_v" + input: "bert/encoder/layer_3/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_3/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_361/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_361" + op: "Mul" + input: "Mul_361/x" + input: "bert/encoder/layer_3/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_362/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_362" + op: "Mul" + input: "Mul_362/x" + input: "clip_by_global_norm/clip_by_global_norm/_66" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_228" + op: "Add" + input: "Mul_361" + input: "Mul_362" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_363/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_363" + op: "Mul" + input: "Mul_363/x" + input: "bert/encoder/layer_3/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_66" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_66" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_364/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_364" + op: "Mul" + input: "Mul_364/x" + input: "Square_66" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_229" + op: "Add" + input: "Mul_363" + input: "Mul_364" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_66" + op: "Sqrt" + input: "add_229" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_230/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_230" + op: "Add" + input: "Sqrt_66" + input: "add_230/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_67" + op: "RealDiv" + input: "add_228" + input: "add_230" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_365" + op: "Mul" + input: "add_2" + input: "truediv_67" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_67" + op: "Sub" + input: "bert/encoder/layer_3/output/dense/bias/read" + input: "mul_365" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_404" + op: "Assign" + input: "bert/encoder/layer_3/output/dense/bias" + input: "sub_67" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_405" + op: "Assign" + input: "bert/encoder/layer_3/output/dense/bias/adam_m" + input: "add_228" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_406" + op: "Assign" + input: "bert/encoder/layer_3/output/dense/bias/adam_v" + input: "add_229" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_366/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_366" + op: "Mul" + input: "Mul_366/x" + input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_367/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_367" + op: "Mul" + input: "Mul_367/x" + input: "clip_by_global_norm/clip_by_global_norm/_67" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_231" + op: "Add" + input: "Mul_366" + input: "Mul_367" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_368/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_368" + op: "Mul" + input: "Mul_368/x" + input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_67" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_67" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_369/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_369" + op: "Mul" + input: "Mul_369/x" + input: "Square_67" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_232" + op: "Add" + input: "Mul_368" + input: "Mul_369" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_67" + op: "Sqrt" + input: "add_232" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_233/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_233" + op: "Add" + input: "Sqrt_67" + input: "add_233/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_68" + op: "RealDiv" + input: "add_231" + input: "add_233" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_370" + op: "Mul" + input: "add_2" + input: "truediv_68" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_68" + op: "Sub" + input: "bert/encoder/layer_3/output/LayerNorm/beta/read" + input: "mul_370" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_407" + op: "Assign" + input: "bert/encoder/layer_3/output/LayerNorm/beta" + input: "sub_68" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_408" + op: "Assign" + input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m" + input: "add_231" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_409" + op: "Assign" + input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v" + input: "add_232" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_371/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_371" + op: "Mul" + input: "Mul_371/x" + input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_372/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_372" + op: "Mul" + input: "Mul_372/x" + input: "clip_by_global_norm/clip_by_global_norm/_68" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_234" + op: "Add" + input: "Mul_371" + input: "Mul_372" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_373/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_373" + op: "Mul" + input: "Mul_373/x" + input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_68" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_68" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_374/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_374" + op: "Mul" + input: "Mul_374/x" + input: "Square_68" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_235" + op: "Add" + input: "Mul_373" + input: "Mul_374" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_68" + op: "Sqrt" + input: "add_235" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_236/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_236" + op: "Add" + input: "Sqrt_68" + input: "add_236/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_69" + op: "RealDiv" + input: "add_234" + input: "add_236" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_375" + op: "Mul" + input: "add_2" + input: "truediv_69" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_69" + op: "Sub" + input: "bert/encoder/layer_3/output/LayerNorm/gamma/read" + input: "mul_375" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_410" + op: "Assign" + input: "bert/encoder/layer_3/output/LayerNorm/gamma" + input: "sub_69" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_411" + op: "Assign" + input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m" + input: "add_234" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_412" + op: "Assign" + input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v" + input: "add_235" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_4/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_4/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_4/attention/self/query/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/self/query/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_4/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_4/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_4/attention/self/query/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/self/query/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_376/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_376" + op: "Mul" + input: "Mul_376/x" + input: "bert/encoder/layer_4/attention/self/query/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_377/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_377" + op: "Mul" + input: "Mul_377/x" + input: "clip_by_global_norm/clip_by_global_norm/_69" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_237" + op: "Add" + input: "Mul_376" + input: "Mul_377" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_378/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_378" + op: "Mul" + input: "Mul_378/x" + input: "bert/encoder/layer_4/attention/self/query/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_69" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_69" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_379/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_379" + op: "Mul" + input: "Mul_379/x" + input: "Square_69" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_238" + op: "Add" + input: "Mul_378" + input: "Mul_379" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_69" + op: "Sqrt" + input: "add_238" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_239/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_239" + op: "Add" + input: "Sqrt_69" + input: "add_239/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_70" + op: "RealDiv" + input: "add_237" + input: "add_239" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_380/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_380" + op: "Mul" + input: "mul_380/x" + input: "bert/encoder/layer_4/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_240" + op: "Add" + input: "truediv_70" + input: "mul_380" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_381" + op: "Mul" + input: "add_2" + input: "add_240" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_70" + op: "Sub" + input: "bert/encoder/layer_4/attention/self/query/kernel/read" + input: "mul_381" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_413" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/query/kernel" + input: "sub_70" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_414" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/query/kernel/adam_m" + input: "add_237" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_415" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/query/kernel/adam_v" + input: "add_238" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_4/attention/self/query/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/self/query/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_4/attention/self/query/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/query/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/self/query/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_382/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_382" + op: "Mul" + input: "Mul_382/x" + input: "bert/encoder/layer_4/attention/self/query/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_383/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_383" + op: "Mul" + input: "Mul_383/x" + input: "clip_by_global_norm/clip_by_global_norm/_70" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_241" + op: "Add" + input: "Mul_382" + input: "Mul_383" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_384/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_384" + op: "Mul" + input: "Mul_384/x" + input: "bert/encoder/layer_4/attention/self/query/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_70" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_70" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_385/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_385" + op: "Mul" + input: "Mul_385/x" + input: "Square_70" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_242" + op: "Add" + input: "Mul_384" + input: "Mul_385" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_70" + op: "Sqrt" + input: "add_242" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_243/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_243" + op: "Add" + input: "Sqrt_70" + input: "add_243/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_71" + op: "RealDiv" + input: "add_241" + input: "add_243" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_386" + op: "Mul" + input: "add_2" + input: "truediv_71" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_71" + op: "Sub" + input: "bert/encoder/layer_4/attention/self/query/bias/read" + input: "mul_386" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_416" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/query/bias" + input: "sub_71" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_417" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/query/bias/adam_m" + input: "add_241" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_418" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/query/bias/adam_v" + input: "add_242" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_4/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_4/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_4/attention/self/key/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/self/key/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_4/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_4/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_4/attention/self/key/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/self/key/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_387/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_387" + op: "Mul" + input: "Mul_387/x" + input: "bert/encoder/layer_4/attention/self/key/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_388/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_388" + op: "Mul" + input: "Mul_388/x" + input: "clip_by_global_norm/clip_by_global_norm/_71" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_244" + op: "Add" + input: "Mul_387" + input: "Mul_388" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_389/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_389" + op: "Mul" + input: "Mul_389/x" + input: "bert/encoder/layer_4/attention/self/key/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_71" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_71" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_390/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_390" + op: "Mul" + input: "Mul_390/x" + input: "Square_71" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_245" + op: "Add" + input: "Mul_389" + input: "Mul_390" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_71" + op: "Sqrt" + input: "add_245" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_246/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_246" + op: "Add" + input: "Sqrt_71" + input: "add_246/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_72" + op: "RealDiv" + input: "add_244" + input: "add_246" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_391/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_391" + op: "Mul" + input: "mul_391/x" + input: "bert/encoder/layer_4/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_247" + op: "Add" + input: "truediv_72" + input: "mul_391" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_392" + op: "Mul" + input: "add_2" + input: "add_247" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_72" + op: "Sub" + input: "bert/encoder/layer_4/attention/self/key/kernel/read" + input: "mul_392" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_419" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/key/kernel" + input: "sub_72" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_420" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/key/kernel/adam_m" + input: "add_244" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_421" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/key/kernel/adam_v" + input: "add_245" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_4/attention/self/key/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/self/key/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_4/attention/self/key/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/key/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/self/key/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_393/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_393" + op: "Mul" + input: "Mul_393/x" + input: "bert/encoder/layer_4/attention/self/key/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_394/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_394" + op: "Mul" + input: "Mul_394/x" + input: "clip_by_global_norm/clip_by_global_norm/_72" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_248" + op: "Add" + input: "Mul_393" + input: "Mul_394" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_395/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_395" + op: "Mul" + input: "Mul_395/x" + input: "bert/encoder/layer_4/attention/self/key/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_72" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_72" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_396/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_396" + op: "Mul" + input: "Mul_396/x" + input: "Square_72" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_249" + op: "Add" + input: "Mul_395" + input: "Mul_396" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_72" + op: "Sqrt" + input: "add_249" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_250/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_250" + op: "Add" + input: "Sqrt_72" + input: "add_250/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_73" + op: "RealDiv" + input: "add_248" + input: "add_250" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_397" + op: "Mul" + input: "add_2" + input: "truediv_73" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_73" + op: "Sub" + input: "bert/encoder/layer_4/attention/self/key/bias/read" + input: "mul_397" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_422" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/key/bias" + input: "sub_73" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_423" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/key/bias/adam_m" + input: "add_248" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_424" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/key/bias/adam_v" + input: "add_249" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_4/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_4/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_4/attention/self/value/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/self/value/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_4/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_4/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_4/attention/self/value/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/self/value/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_398/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_398" + op: "Mul" + input: "Mul_398/x" + input: "bert/encoder/layer_4/attention/self/value/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_399/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_399" + op: "Mul" + input: "Mul_399/x" + input: "clip_by_global_norm/clip_by_global_norm/_73" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_251" + op: "Add" + input: "Mul_398" + input: "Mul_399" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_400/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_400" + op: "Mul" + input: "Mul_400/x" + input: "bert/encoder/layer_4/attention/self/value/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_73" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_73" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_401/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_401" + op: "Mul" + input: "Mul_401/x" + input: "Square_73" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_252" + op: "Add" + input: "Mul_400" + input: "Mul_401" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_73" + op: "Sqrt" + input: "add_252" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_253/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_253" + op: "Add" + input: "Sqrt_73" + input: "add_253/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_74" + op: "RealDiv" + input: "add_251" + input: "add_253" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_402/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_402" + op: "Mul" + input: "mul_402/x" + input: "bert/encoder/layer_4/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_254" + op: "Add" + input: "truediv_74" + input: "mul_402" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_403" + op: "Mul" + input: "add_2" + input: "add_254" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_74" + op: "Sub" + input: "bert/encoder/layer_4/attention/self/value/kernel/read" + input: "mul_403" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_425" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/value/kernel" + input: "sub_74" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_426" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/value/kernel/adam_m" + input: "add_251" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_427" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/value/kernel/adam_v" + input: "add_252" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_4/attention/self/value/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/self/value/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_4/attention/self/value/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/self/value/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/self/value/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_404/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_404" + op: "Mul" + input: "Mul_404/x" + input: "bert/encoder/layer_4/attention/self/value/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_405/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_405" + op: "Mul" + input: "Mul_405/x" + input: "clip_by_global_norm/clip_by_global_norm/_74" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_255" + op: "Add" + input: "Mul_404" + input: "Mul_405" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_406/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_406" + op: "Mul" + input: "Mul_406/x" + input: "bert/encoder/layer_4/attention/self/value/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_74" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_74" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_407/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_407" + op: "Mul" + input: "Mul_407/x" + input: "Square_74" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_256" + op: "Add" + input: "Mul_406" + input: "Mul_407" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_74" + op: "Sqrt" + input: "add_256" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_257/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_257" + op: "Add" + input: "Sqrt_74" + input: "add_257/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_75" + op: "RealDiv" + input: "add_255" + input: "add_257" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_408" + op: "Mul" + input: "add_2" + input: "truediv_75" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_75" + op: "Sub" + input: "bert/encoder/layer_4/attention/self/value/bias/read" + input: "mul_408" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_428" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/value/bias" + input: "sub_75" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_429" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/value/bias/adam_m" + input: "add_255" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_430" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/value/bias/adam_v" + input: "add_256" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_409/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_409" + op: "Mul" + input: "Mul_409/x" + input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_410/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_410" + op: "Mul" + input: "Mul_410/x" + input: "clip_by_global_norm/clip_by_global_norm/_75" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_258" + op: "Add" + input: "Mul_409" + input: "Mul_410" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_411/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_411" + op: "Mul" + input: "Mul_411/x" + input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_75" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_75" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_412/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_412" + op: "Mul" + input: "Mul_412/x" + input: "Square_75" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_259" + op: "Add" + input: "Mul_411" + input: "Mul_412" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_75" + op: "Sqrt" + input: "add_259" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_260/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_260" + op: "Add" + input: "Sqrt_75" + input: "add_260/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_76" + op: "RealDiv" + input: "add_258" + input: "add_260" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_413/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_413" + op: "Mul" + input: "mul_413/x" + input: "bert/encoder/layer_4/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_261" + op: "Add" + input: "truediv_76" + input: "mul_413" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_414" + op: "Mul" + input: "add_2" + input: "add_261" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_76" + op: "Sub" + input: "bert/encoder/layer_4/attention/output/dense/kernel/read" + input: "mul_414" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_431" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/dense/kernel" + input: "sub_76" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_432" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m" + input: "add_258" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_433" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v" + input: "add_259" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_4/attention/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_4/attention/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_415/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_415" + op: "Mul" + input: "Mul_415/x" + input: "bert/encoder/layer_4/attention/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_416/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_416" + op: "Mul" + input: "Mul_416/x" + input: "clip_by_global_norm/clip_by_global_norm/_76" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_262" + op: "Add" + input: "Mul_415" + input: "Mul_416" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_417/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_417" + op: "Mul" + input: "Mul_417/x" + input: "bert/encoder/layer_4/attention/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_76" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_76" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_418/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_418" + op: "Mul" + input: "Mul_418/x" + input: "Square_76" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_263" + op: "Add" + input: "Mul_417" + input: "Mul_418" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_76" + op: "Sqrt" + input: "add_263" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_264/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_264" + op: "Add" + input: "Sqrt_76" + input: "add_264/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_77" + op: "RealDiv" + input: "add_262" + input: "add_264" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_419" + op: "Mul" + input: "add_2" + input: "truediv_77" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_77" + op: "Sub" + input: "bert/encoder/layer_4/attention/output/dense/bias/read" + input: "mul_419" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_434" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/dense/bias" + input: "sub_77" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_435" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/dense/bias/adam_m" + input: "add_262" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_436" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/dense/bias/adam_v" + input: "add_263" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_420/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_420" + op: "Mul" + input: "Mul_420/x" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_421/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_421" + op: "Mul" + input: "Mul_421/x" + input: "clip_by_global_norm/clip_by_global_norm/_77" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_265" + op: "Add" + input: "Mul_420" + input: "Mul_421" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_422/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_422" + op: "Mul" + input: "Mul_422/x" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_77" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_77" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_423/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_423" + op: "Mul" + input: "Mul_423/x" + input: "Square_77" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_266" + op: "Add" + input: "Mul_422" + input: "Mul_423" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_77" + op: "Sqrt" + input: "add_266" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_267/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_267" + op: "Add" + input: "Sqrt_77" + input: "add_267/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_78" + op: "RealDiv" + input: "add_265" + input: "add_267" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_424" + op: "Mul" + input: "add_2" + input: "truediv_78" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_78" + op: "Sub" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/read" + input: "mul_424" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_437" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta" + input: "sub_78" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_438" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m" + input: "add_265" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_439" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v" + input: "add_266" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_425/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_425" + op: "Mul" + input: "Mul_425/x" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_426/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_426" + op: "Mul" + input: "Mul_426/x" + input: "clip_by_global_norm/clip_by_global_norm/_78" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_268" + op: "Add" + input: "Mul_425" + input: "Mul_426" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_427/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_427" + op: "Mul" + input: "Mul_427/x" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_78" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_78" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_428/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_428" + op: "Mul" + input: "Mul_428/x" + input: "Square_78" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_269" + op: "Add" + input: "Mul_427" + input: "Mul_428" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_78" + op: "Sqrt" + input: "add_269" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_270/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_270" + op: "Add" + input: "Sqrt_78" + input: "add_270/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_79" + op: "RealDiv" + input: "add_268" + input: "add_270" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_429" + op: "Mul" + input: "add_2" + input: "truediv_79" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_79" + op: "Sub" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/read" + input: "mul_429" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_440" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma" + input: "sub_79" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_441" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m" + input: "add_268" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_442" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v" + input: "add_269" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_430/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_430" + op: "Mul" + input: "Mul_430/x" + input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_431/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_431" + op: "Mul" + input: "Mul_431/x" + input: "clip_by_global_norm/clip_by_global_norm/_79" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_271" + op: "Add" + input: "Mul_430" + input: "Mul_431" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_432/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_432" + op: "Mul" + input: "Mul_432/x" + input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_79" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_79" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_433/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_433" + op: "Mul" + input: "Mul_433/x" + input: "Square_79" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_272" + op: "Add" + input: "Mul_432" + input: "Mul_433" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_79" + op: "Sqrt" + input: "add_272" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_273/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_273" + op: "Add" + input: "Sqrt_79" + input: "add_273/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_80" + op: "RealDiv" + input: "add_271" + input: "add_273" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_434/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_434" + op: "Mul" + input: "mul_434/x" + input: "bert/encoder/layer_4/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_274" + op: "Add" + input: "truediv_80" + input: "mul_434" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_435" + op: "Mul" + input: "add_2" + input: "add_274" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_80" + op: "Sub" + input: "bert/encoder/layer_4/intermediate/dense/kernel/read" + input: "mul_435" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_443" + op: "Assign" + input: "bert/encoder/layer_4/intermediate/dense/kernel" + input: "sub_80" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_444" + op: "Assign" + input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m" + input: "add_271" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_445" + op: "Assign" + input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v" + input: "add_272" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/bias/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_4/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_4/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_4/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_4/intermediate/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_4/intermediate/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/bias/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_4/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_4/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_4/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_4/intermediate/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/intermediate/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_4/intermediate/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_436/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_436" + op: "Mul" + input: "Mul_436/x" + input: "bert/encoder/layer_4/intermediate/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_437/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_437" + op: "Mul" + input: "Mul_437/x" + input: "clip_by_global_norm/clip_by_global_norm/_80" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_275" + op: "Add" + input: "Mul_436" + input: "Mul_437" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_438/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_438" + op: "Mul" + input: "Mul_438/x" + input: "bert/encoder/layer_4/intermediate/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_80" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_80" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_439/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_439" + op: "Mul" + input: "Mul_439/x" + input: "Square_80" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_276" + op: "Add" + input: "Mul_438" + input: "Mul_439" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_80" + op: "Sqrt" + input: "add_276" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_277/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_277" + op: "Add" + input: "Sqrt_80" + input: "add_277/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_81" + op: "RealDiv" + input: "add_275" + input: "add_277" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_440" + op: "Mul" + input: "add_2" + input: "truediv_81" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_81" + op: "Sub" + input: "bert/encoder/layer_4/intermediate/dense/bias/read" + input: "mul_440" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_446" + op: "Assign" + input: "bert/encoder/layer_4/intermediate/dense/bias" + input: "sub_81" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_447" + op: "Assign" + input: "bert/encoder/layer_4/intermediate/dense/bias/adam_m" + input: "add_275" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_448" + op: "Assign" + input: "bert/encoder/layer_4/intermediate/dense/bias/adam_v" + input: "add_276" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_4/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_4/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_4/output/dense/kernel/adam_m" + input: "bert/encoder/layer_4/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_4/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_4/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_4/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_4/output/dense/kernel/adam_v" + input: "bert/encoder/layer_4/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_4/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_441/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_441" + op: "Mul" + input: "Mul_441/x" + input: "bert/encoder/layer_4/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_442/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_442" + op: "Mul" + input: "Mul_442/x" + input: "clip_by_global_norm/clip_by_global_norm/_81" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_278" + op: "Add" + input: "Mul_441" + input: "Mul_442" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_443/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_443" + op: "Mul" + input: "Mul_443/x" + input: "bert/encoder/layer_4/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_81" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_81" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_444/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_444" + op: "Mul" + input: "Mul_444/x" + input: "Square_81" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_279" + op: "Add" + input: "Mul_443" + input: "Mul_444" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_81" + op: "Sqrt" + input: "add_279" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_280/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_280" + op: "Add" + input: "Sqrt_81" + input: "add_280/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_82" + op: "RealDiv" + input: "add_278" + input: "add_280" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_445/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_445" + op: "Mul" + input: "mul_445/x" + input: "bert/encoder/layer_4/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_281" + op: "Add" + input: "truediv_82" + input: "mul_445" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_446" + op: "Mul" + input: "add_2" + input: "add_281" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_82" + op: "Sub" + input: "bert/encoder/layer_4/output/dense/kernel/read" + input: "mul_446" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_449" + op: "Assign" + input: "bert/encoder/layer_4/output/dense/kernel" + input: "sub_82" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_450" + op: "Assign" + input: "bert/encoder/layer_4/output/dense/kernel/adam_m" + input: "add_278" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_451" + op: "Assign" + input: "bert/encoder/layer_4/output/dense/kernel/adam_v" + input: "add_279" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_4/output/dense/bias/adam_m" + input: "bert/encoder/layer_4/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_4/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_4/output/dense/bias/adam_v" + input: "bert/encoder/layer_4/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_4/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_447/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_447" + op: "Mul" + input: "Mul_447/x" + input: "bert/encoder/layer_4/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_448/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_448" + op: "Mul" + input: "Mul_448/x" + input: "clip_by_global_norm/clip_by_global_norm/_82" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_282" + op: "Add" + input: "Mul_447" + input: "Mul_448" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_449/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_449" + op: "Mul" + input: "Mul_449/x" + input: "bert/encoder/layer_4/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_82" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_82" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_450/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_450" + op: "Mul" + input: "Mul_450/x" + input: "Square_82" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_283" + op: "Add" + input: "Mul_449" + input: "Mul_450" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_82" + op: "Sqrt" + input: "add_283" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_284/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_284" + op: "Add" + input: "Sqrt_82" + input: "add_284/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_83" + op: "RealDiv" + input: "add_282" + input: "add_284" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_451" + op: "Mul" + input: "add_2" + input: "truediv_83" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_83" + op: "Sub" + input: "bert/encoder/layer_4/output/dense/bias/read" + input: "mul_451" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_452" + op: "Assign" + input: "bert/encoder/layer_4/output/dense/bias" + input: "sub_83" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_453" + op: "Assign" + input: "bert/encoder/layer_4/output/dense/bias/adam_m" + input: "add_282" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_454" + op: "Assign" + input: "bert/encoder/layer_4/output/dense/bias/adam_v" + input: "add_283" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_452/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_452" + op: "Mul" + input: "Mul_452/x" + input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_453/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_453" + op: "Mul" + input: "Mul_453/x" + input: "clip_by_global_norm/clip_by_global_norm/_83" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_285" + op: "Add" + input: "Mul_452" + input: "Mul_453" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_454/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_454" + op: "Mul" + input: "Mul_454/x" + input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_83" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_83" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_455/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_455" + op: "Mul" + input: "Mul_455/x" + input: "Square_83" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_286" + op: "Add" + input: "Mul_454" + input: "Mul_455" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_83" + op: "Sqrt" + input: "add_286" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_287/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_287" + op: "Add" + input: "Sqrt_83" + input: "add_287/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_84" + op: "RealDiv" + input: "add_285" + input: "add_287" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_456" + op: "Mul" + input: "add_2" + input: "truediv_84" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_84" + op: "Sub" + input: "bert/encoder/layer_4/output/LayerNorm/beta/read" + input: "mul_456" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_455" + op: "Assign" + input: "bert/encoder/layer_4/output/LayerNorm/beta" + input: "sub_84" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_456" + op: "Assign" + input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m" + input: "add_285" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_457" + op: "Assign" + input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v" + input: "add_286" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_457/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_457" + op: "Mul" + input: "Mul_457/x" + input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_458/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_458" + op: "Mul" + input: "Mul_458/x" + input: "clip_by_global_norm/clip_by_global_norm/_84" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_288" + op: "Add" + input: "Mul_457" + input: "Mul_458" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_459/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_459" + op: "Mul" + input: "Mul_459/x" + input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_84" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_84" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_460/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_460" + op: "Mul" + input: "Mul_460/x" + input: "Square_84" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_289" + op: "Add" + input: "Mul_459" + input: "Mul_460" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_84" + op: "Sqrt" + input: "add_289" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_290/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_290" + op: "Add" + input: "Sqrt_84" + input: "add_290/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_85" + op: "RealDiv" + input: "add_288" + input: "add_290" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_461" + op: "Mul" + input: "add_2" + input: "truediv_85" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_85" + op: "Sub" + input: "bert/encoder/layer_4/output/LayerNorm/gamma/read" + input: "mul_461" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_458" + op: "Assign" + input: "bert/encoder/layer_4/output/LayerNorm/gamma" + input: "sub_85" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_459" + op: "Assign" + input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m" + input: "add_288" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_460" + op: "Assign" + input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v" + input: "add_289" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_5/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_5/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_5/attention/self/query/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/self/query/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_5/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_5/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_5/attention/self/query/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/self/query/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_462/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_462" + op: "Mul" + input: "Mul_462/x" + input: "bert/encoder/layer_5/attention/self/query/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_463/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_463" + op: "Mul" + input: "Mul_463/x" + input: "clip_by_global_norm/clip_by_global_norm/_85" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_291" + op: "Add" + input: "Mul_462" + input: "Mul_463" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_464/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_464" + op: "Mul" + input: "Mul_464/x" + input: "bert/encoder/layer_5/attention/self/query/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_85" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_85" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_465/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_465" + op: "Mul" + input: "Mul_465/x" + input: "Square_85" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_292" + op: "Add" + input: "Mul_464" + input: "Mul_465" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_85" + op: "Sqrt" + input: "add_292" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_293/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_293" + op: "Add" + input: "Sqrt_85" + input: "add_293/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_86" + op: "RealDiv" + input: "add_291" + input: "add_293" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_466/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_466" + op: "Mul" + input: "mul_466/x" + input: "bert/encoder/layer_5/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_294" + op: "Add" + input: "truediv_86" + input: "mul_466" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_467" + op: "Mul" + input: "add_2" + input: "add_294" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_86" + op: "Sub" + input: "bert/encoder/layer_5/attention/self/query/kernel/read" + input: "mul_467" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_461" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/query/kernel" + input: "sub_86" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_462" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/query/kernel/adam_m" + input: "add_291" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_463" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/query/kernel/adam_v" + input: "add_292" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_5/attention/self/query/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/self/query/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_5/attention/self/query/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/query/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/self/query/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_468/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_468" + op: "Mul" + input: "Mul_468/x" + input: "bert/encoder/layer_5/attention/self/query/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_469/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_469" + op: "Mul" + input: "Mul_469/x" + input: "clip_by_global_norm/clip_by_global_norm/_86" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_295" + op: "Add" + input: "Mul_468" + input: "Mul_469" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_470/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_470" + op: "Mul" + input: "Mul_470/x" + input: "bert/encoder/layer_5/attention/self/query/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_86" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_86" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_471/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_471" + op: "Mul" + input: "Mul_471/x" + input: "Square_86" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_296" + op: "Add" + input: "Mul_470" + input: "Mul_471" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_86" + op: "Sqrt" + input: "add_296" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_297/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_297" + op: "Add" + input: "Sqrt_86" + input: "add_297/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_87" + op: "RealDiv" + input: "add_295" + input: "add_297" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_472" + op: "Mul" + input: "add_2" + input: "truediv_87" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_87" + op: "Sub" + input: "bert/encoder/layer_5/attention/self/query/bias/read" + input: "mul_472" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_464" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/query/bias" + input: "sub_87" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_465" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/query/bias/adam_m" + input: "add_295" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_466" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/query/bias/adam_v" + input: "add_296" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_5/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_5/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_5/attention/self/key/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/self/key/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_5/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_5/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_5/attention/self/key/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/self/key/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_473/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_473" + op: "Mul" + input: "Mul_473/x" + input: "bert/encoder/layer_5/attention/self/key/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_474/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_474" + op: "Mul" + input: "Mul_474/x" + input: "clip_by_global_norm/clip_by_global_norm/_87" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_298" + op: "Add" + input: "Mul_473" + input: "Mul_474" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_475/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_475" + op: "Mul" + input: "Mul_475/x" + input: "bert/encoder/layer_5/attention/self/key/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_87" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_87" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_476/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_476" + op: "Mul" + input: "Mul_476/x" + input: "Square_87" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_299" + op: "Add" + input: "Mul_475" + input: "Mul_476" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_87" + op: "Sqrt" + input: "add_299" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_300/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_300" + op: "Add" + input: "Sqrt_87" + input: "add_300/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_88" + op: "RealDiv" + input: "add_298" + input: "add_300" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_477/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_477" + op: "Mul" + input: "mul_477/x" + input: "bert/encoder/layer_5/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_301" + op: "Add" + input: "truediv_88" + input: "mul_477" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_478" + op: "Mul" + input: "add_2" + input: "add_301" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_88" + op: "Sub" + input: "bert/encoder/layer_5/attention/self/key/kernel/read" + input: "mul_478" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_467" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/key/kernel" + input: "sub_88" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_468" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/key/kernel/adam_m" + input: "add_298" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_469" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/key/kernel/adam_v" + input: "add_299" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_5/attention/self/key/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/self/key/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_5/attention/self/key/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/key/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/self/key/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_479/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_479" + op: "Mul" + input: "Mul_479/x" + input: "bert/encoder/layer_5/attention/self/key/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_480/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_480" + op: "Mul" + input: "Mul_480/x" + input: "clip_by_global_norm/clip_by_global_norm/_88" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_302" + op: "Add" + input: "Mul_479" + input: "Mul_480" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_481/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_481" + op: "Mul" + input: "Mul_481/x" + input: "bert/encoder/layer_5/attention/self/key/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_88" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_88" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_482/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_482" + op: "Mul" + input: "Mul_482/x" + input: "Square_88" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_303" + op: "Add" + input: "Mul_481" + input: "Mul_482" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_88" + op: "Sqrt" + input: "add_303" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_304/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_304" + op: "Add" + input: "Sqrt_88" + input: "add_304/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_89" + op: "RealDiv" + input: "add_302" + input: "add_304" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_483" + op: "Mul" + input: "add_2" + input: "truediv_89" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_89" + op: "Sub" + input: "bert/encoder/layer_5/attention/self/key/bias/read" + input: "mul_483" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_470" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/key/bias" + input: "sub_89" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_471" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/key/bias/adam_m" + input: "add_302" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_472" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/key/bias/adam_v" + input: "add_303" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_5/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_5/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_5/attention/self/value/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/self/value/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_5/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_5/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_5/attention/self/value/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/self/value/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_484/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_484" + op: "Mul" + input: "Mul_484/x" + input: "bert/encoder/layer_5/attention/self/value/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_485/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_485" + op: "Mul" + input: "Mul_485/x" + input: "clip_by_global_norm/clip_by_global_norm/_89" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_305" + op: "Add" + input: "Mul_484" + input: "Mul_485" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_486/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_486" + op: "Mul" + input: "Mul_486/x" + input: "bert/encoder/layer_5/attention/self/value/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_89" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_89" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_487/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_487" + op: "Mul" + input: "Mul_487/x" + input: "Square_89" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_306" + op: "Add" + input: "Mul_486" + input: "Mul_487" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_89" + op: "Sqrt" + input: "add_306" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_307/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_307" + op: "Add" + input: "Sqrt_89" + input: "add_307/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_90" + op: "RealDiv" + input: "add_305" + input: "add_307" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_488/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_488" + op: "Mul" + input: "mul_488/x" + input: "bert/encoder/layer_5/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_308" + op: "Add" + input: "truediv_90" + input: "mul_488" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_489" + op: "Mul" + input: "add_2" + input: "add_308" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_90" + op: "Sub" + input: "bert/encoder/layer_5/attention/self/value/kernel/read" + input: "mul_489" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_473" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/value/kernel" + input: "sub_90" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_474" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/value/kernel/adam_m" + input: "add_305" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_475" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/value/kernel/adam_v" + input: "add_306" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_5/attention/self/value/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/self/value/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_5/attention/self/value/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/self/value/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/self/value/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_490/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_490" + op: "Mul" + input: "Mul_490/x" + input: "bert/encoder/layer_5/attention/self/value/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_491/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_491" + op: "Mul" + input: "Mul_491/x" + input: "clip_by_global_norm/clip_by_global_norm/_90" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_309" + op: "Add" + input: "Mul_490" + input: "Mul_491" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_492/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_492" + op: "Mul" + input: "Mul_492/x" + input: "bert/encoder/layer_5/attention/self/value/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_90" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_90" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_493/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_493" + op: "Mul" + input: "Mul_493/x" + input: "Square_90" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_310" + op: "Add" + input: "Mul_492" + input: "Mul_493" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_90" + op: "Sqrt" + input: "add_310" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_311/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_311" + op: "Add" + input: "Sqrt_90" + input: "add_311/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_91" + op: "RealDiv" + input: "add_309" + input: "add_311" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_494" + op: "Mul" + input: "add_2" + input: "truediv_91" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_91" + op: "Sub" + input: "bert/encoder/layer_5/attention/self/value/bias/read" + input: "mul_494" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_476" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/value/bias" + input: "sub_91" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_477" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/value/bias/adam_m" + input: "add_309" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_478" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/value/bias/adam_v" + input: "add_310" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_495/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_495" + op: "Mul" + input: "Mul_495/x" + input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_496/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_496" + op: "Mul" + input: "Mul_496/x" + input: "clip_by_global_norm/clip_by_global_norm/_91" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_312" + op: "Add" + input: "Mul_495" + input: "Mul_496" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_497/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_497" + op: "Mul" + input: "Mul_497/x" + input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_91" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_91" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_498/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_498" + op: "Mul" + input: "Mul_498/x" + input: "Square_91" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_313" + op: "Add" + input: "Mul_497" + input: "Mul_498" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_91" + op: "Sqrt" + input: "add_313" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_314/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_314" + op: "Add" + input: "Sqrt_91" + input: "add_314/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_92" + op: "RealDiv" + input: "add_312" + input: "add_314" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_499/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_499" + op: "Mul" + input: "mul_499/x" + input: "bert/encoder/layer_5/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_315" + op: "Add" + input: "truediv_92" + input: "mul_499" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_500" + op: "Mul" + input: "add_2" + input: "add_315" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_92" + op: "Sub" + input: "bert/encoder/layer_5/attention/output/dense/kernel/read" + input: "mul_500" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_479" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/dense/kernel" + input: "sub_92" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_480" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m" + input: "add_312" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_481" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v" + input: "add_313" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_5/attention/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_5/attention/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_501/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_501" + op: "Mul" + input: "Mul_501/x" + input: "bert/encoder/layer_5/attention/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_502/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_502" + op: "Mul" + input: "Mul_502/x" + input: "clip_by_global_norm/clip_by_global_norm/_92" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_316" + op: "Add" + input: "Mul_501" + input: "Mul_502" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_503/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_503" + op: "Mul" + input: "Mul_503/x" + input: "bert/encoder/layer_5/attention/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_92" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_92" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_504/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_504" + op: "Mul" + input: "Mul_504/x" + input: "Square_92" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_317" + op: "Add" + input: "Mul_503" + input: "Mul_504" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_92" + op: "Sqrt" + input: "add_317" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_318/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_318" + op: "Add" + input: "Sqrt_92" + input: "add_318/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_93" + op: "RealDiv" + input: "add_316" + input: "add_318" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_505" + op: "Mul" + input: "add_2" + input: "truediv_93" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_93" + op: "Sub" + input: "bert/encoder/layer_5/attention/output/dense/bias/read" + input: "mul_505" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_482" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/dense/bias" + input: "sub_93" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_483" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/dense/bias/adam_m" + input: "add_316" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_484" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/dense/bias/adam_v" + input: "add_317" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_506/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_506" + op: "Mul" + input: "Mul_506/x" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_507/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_507" + op: "Mul" + input: "Mul_507/x" + input: "clip_by_global_norm/clip_by_global_norm/_93" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_319" + op: "Add" + input: "Mul_506" + input: "Mul_507" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_508/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_508" + op: "Mul" + input: "Mul_508/x" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_93" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_93" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_509/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_509" + op: "Mul" + input: "Mul_509/x" + input: "Square_93" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_320" + op: "Add" + input: "Mul_508" + input: "Mul_509" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_93" + op: "Sqrt" + input: "add_320" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_321/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_321" + op: "Add" + input: "Sqrt_93" + input: "add_321/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_94" + op: "RealDiv" + input: "add_319" + input: "add_321" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_510" + op: "Mul" + input: "add_2" + input: "truediv_94" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_94" + op: "Sub" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/read" + input: "mul_510" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_485" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta" + input: "sub_94" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_486" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m" + input: "add_319" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_487" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v" + input: "add_320" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_511/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_511" + op: "Mul" + input: "Mul_511/x" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_512/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_512" + op: "Mul" + input: "Mul_512/x" + input: "clip_by_global_norm/clip_by_global_norm/_94" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_322" + op: "Add" + input: "Mul_511" + input: "Mul_512" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_513/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_513" + op: "Mul" + input: "Mul_513/x" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_94" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_94" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_514/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_514" + op: "Mul" + input: "Mul_514/x" + input: "Square_94" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_323" + op: "Add" + input: "Mul_513" + input: "Mul_514" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_94" + op: "Sqrt" + input: "add_323" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_324/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_324" + op: "Add" + input: "Sqrt_94" + input: "add_324/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_95" + op: "RealDiv" + input: "add_322" + input: "add_324" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_515" + op: "Mul" + input: "add_2" + input: "truediv_95" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_95" + op: "Sub" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/read" + input: "mul_515" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_488" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma" + input: "sub_95" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_489" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m" + input: "add_322" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_490" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v" + input: "add_323" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_516/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_516" + op: "Mul" + input: "Mul_516/x" + input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_517/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_517" + op: "Mul" + input: "Mul_517/x" + input: "clip_by_global_norm/clip_by_global_norm/_95" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_325" + op: "Add" + input: "Mul_516" + input: "Mul_517" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_518/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_518" + op: "Mul" + input: "Mul_518/x" + input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_95" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_95" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_519/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_519" + op: "Mul" + input: "Mul_519/x" + input: "Square_95" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_326" + op: "Add" + input: "Mul_518" + input: "Mul_519" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_95" + op: "Sqrt" + input: "add_326" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_327/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_327" + op: "Add" + input: "Sqrt_95" + input: "add_327/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_96" + op: "RealDiv" + input: "add_325" + input: "add_327" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_520/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_520" + op: "Mul" + input: "mul_520/x" + input: "bert/encoder/layer_5/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_328" + op: "Add" + input: "truediv_96" + input: "mul_520" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_521" + op: "Mul" + input: "add_2" + input: "add_328" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_96" + op: "Sub" + input: "bert/encoder/layer_5/intermediate/dense/kernel/read" + input: "mul_521" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_491" + op: "Assign" + input: "bert/encoder/layer_5/intermediate/dense/kernel" + input: "sub_96" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_492" + op: "Assign" + input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m" + input: "add_325" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_493" + op: "Assign" + input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v" + input: "add_326" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/bias/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_5/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_5/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_5/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_5/intermediate/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_5/intermediate/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/bias/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_5/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_5/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_5/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_5/intermediate/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/intermediate/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_5/intermediate/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_522/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_522" + op: "Mul" + input: "Mul_522/x" + input: "bert/encoder/layer_5/intermediate/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_523/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_523" + op: "Mul" + input: "Mul_523/x" + input: "clip_by_global_norm/clip_by_global_norm/_96" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_329" + op: "Add" + input: "Mul_522" + input: "Mul_523" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_524/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_524" + op: "Mul" + input: "Mul_524/x" + input: "bert/encoder/layer_5/intermediate/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_96" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_96" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_525/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_525" + op: "Mul" + input: "Mul_525/x" + input: "Square_96" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_330" + op: "Add" + input: "Mul_524" + input: "Mul_525" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_96" + op: "Sqrt" + input: "add_330" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_331/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_331" + op: "Add" + input: "Sqrt_96" + input: "add_331/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_97" + op: "RealDiv" + input: "add_329" + input: "add_331" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_526" + op: "Mul" + input: "add_2" + input: "truediv_97" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_97" + op: "Sub" + input: "bert/encoder/layer_5/intermediate/dense/bias/read" + input: "mul_526" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_494" + op: "Assign" + input: "bert/encoder/layer_5/intermediate/dense/bias" + input: "sub_97" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_495" + op: "Assign" + input: "bert/encoder/layer_5/intermediate/dense/bias/adam_m" + input: "add_329" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_496" + op: "Assign" + input: "bert/encoder/layer_5/intermediate/dense/bias/adam_v" + input: "add_330" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_5/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_5/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_5/output/dense/kernel/adam_m" + input: "bert/encoder/layer_5/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_5/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_5/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_5/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_5/output/dense/kernel/adam_v" + input: "bert/encoder/layer_5/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_5/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_527/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_527" + op: "Mul" + input: "Mul_527/x" + input: "bert/encoder/layer_5/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_528/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_528" + op: "Mul" + input: "Mul_528/x" + input: "clip_by_global_norm/clip_by_global_norm/_97" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_332" + op: "Add" + input: "Mul_527" + input: "Mul_528" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_529/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_529" + op: "Mul" + input: "Mul_529/x" + input: "bert/encoder/layer_5/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_97" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_97" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_530/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_530" + op: "Mul" + input: "Mul_530/x" + input: "Square_97" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_333" + op: "Add" + input: "Mul_529" + input: "Mul_530" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_97" + op: "Sqrt" + input: "add_333" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_334/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_334" + op: "Add" + input: "Sqrt_97" + input: "add_334/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_98" + op: "RealDiv" + input: "add_332" + input: "add_334" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_531/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_531" + op: "Mul" + input: "mul_531/x" + input: "bert/encoder/layer_5/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_335" + op: "Add" + input: "truediv_98" + input: "mul_531" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_532" + op: "Mul" + input: "add_2" + input: "add_335" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_98" + op: "Sub" + input: "bert/encoder/layer_5/output/dense/kernel/read" + input: "mul_532" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_497" + op: "Assign" + input: "bert/encoder/layer_5/output/dense/kernel" + input: "sub_98" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_498" + op: "Assign" + input: "bert/encoder/layer_5/output/dense/kernel/adam_m" + input: "add_332" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_499" + op: "Assign" + input: "bert/encoder/layer_5/output/dense/kernel/adam_v" + input: "add_333" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_5/output/dense/bias/adam_m" + input: "bert/encoder/layer_5/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_5/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_5/output/dense/bias/adam_v" + input: "bert/encoder/layer_5/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_5/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_533/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_533" + op: "Mul" + input: "Mul_533/x" + input: "bert/encoder/layer_5/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_534/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_534" + op: "Mul" + input: "Mul_534/x" + input: "clip_by_global_norm/clip_by_global_norm/_98" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_336" + op: "Add" + input: "Mul_533" + input: "Mul_534" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_535/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_535" + op: "Mul" + input: "Mul_535/x" + input: "bert/encoder/layer_5/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_98" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_98" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_536/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_536" + op: "Mul" + input: "Mul_536/x" + input: "Square_98" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_337" + op: "Add" + input: "Mul_535" + input: "Mul_536" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_98" + op: "Sqrt" + input: "add_337" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_338/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_338" + op: "Add" + input: "Sqrt_98" + input: "add_338/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_99" + op: "RealDiv" + input: "add_336" + input: "add_338" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_537" + op: "Mul" + input: "add_2" + input: "truediv_99" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_99" + op: "Sub" + input: "bert/encoder/layer_5/output/dense/bias/read" + input: "mul_537" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_500" + op: "Assign" + input: "bert/encoder/layer_5/output/dense/bias" + input: "sub_99" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_501" + op: "Assign" + input: "bert/encoder/layer_5/output/dense/bias/adam_m" + input: "add_336" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_502" + op: "Assign" + input: "bert/encoder/layer_5/output/dense/bias/adam_v" + input: "add_337" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_538/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_538" + op: "Mul" + input: "Mul_538/x" + input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_539/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_539" + op: "Mul" + input: "Mul_539/x" + input: "clip_by_global_norm/clip_by_global_norm/_99" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_339" + op: "Add" + input: "Mul_538" + input: "Mul_539" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_540/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_540" + op: "Mul" + input: "Mul_540/x" + input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_99" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_99" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_541/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_541" + op: "Mul" + input: "Mul_541/x" + input: "Square_99" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_340" + op: "Add" + input: "Mul_540" + input: "Mul_541" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_99" + op: "Sqrt" + input: "add_340" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_341/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_341" + op: "Add" + input: "Sqrt_99" + input: "add_341/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_100" + op: "RealDiv" + input: "add_339" + input: "add_341" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_542" + op: "Mul" + input: "add_2" + input: "truediv_100" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_100" + op: "Sub" + input: "bert/encoder/layer_5/output/LayerNorm/beta/read" + input: "mul_542" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_503" + op: "Assign" + input: "bert/encoder/layer_5/output/LayerNorm/beta" + input: "sub_100" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_504" + op: "Assign" + input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m" + input: "add_339" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_505" + op: "Assign" + input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v" + input: "add_340" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_543/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_543" + op: "Mul" + input: "Mul_543/x" + input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_544/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_544" + op: "Mul" + input: "Mul_544/x" + input: "clip_by_global_norm/clip_by_global_norm/_100" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_342" + op: "Add" + input: "Mul_543" + input: "Mul_544" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_545/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_545" + op: "Mul" + input: "Mul_545/x" + input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_100" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_100" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_546/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_546" + op: "Mul" + input: "Mul_546/x" + input: "Square_100" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_343" + op: "Add" + input: "Mul_545" + input: "Mul_546" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_100" + op: "Sqrt" + input: "add_343" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_344/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_344" + op: "Add" + input: "Sqrt_100" + input: "add_344/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_101" + op: "RealDiv" + input: "add_342" + input: "add_344" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_547" + op: "Mul" + input: "add_2" + input: "truediv_101" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_101" + op: "Sub" + input: "bert/encoder/layer_5/output/LayerNorm/gamma/read" + input: "mul_547" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_506" + op: "Assign" + input: "bert/encoder/layer_5/output/LayerNorm/gamma" + input: "sub_101" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_507" + op: "Assign" + input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m" + input: "add_342" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_508" + op: "Assign" + input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v" + input: "add_343" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_6/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_6/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_6/attention/self/query/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/self/query/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_6/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_6/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_6/attention/self/query/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/self/query/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_548/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_548" + op: "Mul" + input: "Mul_548/x" + input: "bert/encoder/layer_6/attention/self/query/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_549/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_549" + op: "Mul" + input: "Mul_549/x" + input: "clip_by_global_norm/clip_by_global_norm/_101" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_345" + op: "Add" + input: "Mul_548" + input: "Mul_549" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_550/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_550" + op: "Mul" + input: "Mul_550/x" + input: "bert/encoder/layer_6/attention/self/query/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_101" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_101" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_551/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_551" + op: "Mul" + input: "Mul_551/x" + input: "Square_101" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_346" + op: "Add" + input: "Mul_550" + input: "Mul_551" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_101" + op: "Sqrt" + input: "add_346" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_347/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_347" + op: "Add" + input: "Sqrt_101" + input: "add_347/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_102" + op: "RealDiv" + input: "add_345" + input: "add_347" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_552/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_552" + op: "Mul" + input: "mul_552/x" + input: "bert/encoder/layer_6/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_348" + op: "Add" + input: "truediv_102" + input: "mul_552" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_553" + op: "Mul" + input: "add_2" + input: "add_348" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_102" + op: "Sub" + input: "bert/encoder/layer_6/attention/self/query/kernel/read" + input: "mul_553" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_509" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/query/kernel" + input: "sub_102" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_510" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/query/kernel/adam_m" + input: "add_345" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_511" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/query/kernel/adam_v" + input: "add_346" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_6/attention/self/query/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/self/query/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_6/attention/self/query/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/query/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/self/query/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_554/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_554" + op: "Mul" + input: "Mul_554/x" + input: "bert/encoder/layer_6/attention/self/query/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_555/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_555" + op: "Mul" + input: "Mul_555/x" + input: "clip_by_global_norm/clip_by_global_norm/_102" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_349" + op: "Add" + input: "Mul_554" + input: "Mul_555" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_556/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_556" + op: "Mul" + input: "Mul_556/x" + input: "bert/encoder/layer_6/attention/self/query/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_102" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_102" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_557/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_557" + op: "Mul" + input: "Mul_557/x" + input: "Square_102" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_350" + op: "Add" + input: "Mul_556" + input: "Mul_557" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_102" + op: "Sqrt" + input: "add_350" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_351/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_351" + op: "Add" + input: "Sqrt_102" + input: "add_351/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_103" + op: "RealDiv" + input: "add_349" + input: "add_351" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_558" + op: "Mul" + input: "add_2" + input: "truediv_103" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_103" + op: "Sub" + input: "bert/encoder/layer_6/attention/self/query/bias/read" + input: "mul_558" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_512" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/query/bias" + input: "sub_103" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_513" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/query/bias/adam_m" + input: "add_349" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_514" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/query/bias/adam_v" + input: "add_350" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_6/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_6/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_6/attention/self/key/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/self/key/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_6/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_6/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_6/attention/self/key/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/self/key/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_559/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_559" + op: "Mul" + input: "Mul_559/x" + input: "bert/encoder/layer_6/attention/self/key/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_560/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_560" + op: "Mul" + input: "Mul_560/x" + input: "clip_by_global_norm/clip_by_global_norm/_103" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_352" + op: "Add" + input: "Mul_559" + input: "Mul_560" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_561/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_561" + op: "Mul" + input: "Mul_561/x" + input: "bert/encoder/layer_6/attention/self/key/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_103" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_103" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_562/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_562" + op: "Mul" + input: "Mul_562/x" + input: "Square_103" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_353" + op: "Add" + input: "Mul_561" + input: "Mul_562" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_103" + op: "Sqrt" + input: "add_353" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_354/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_354" + op: "Add" + input: "Sqrt_103" + input: "add_354/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_104" + op: "RealDiv" + input: "add_352" + input: "add_354" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_563/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_563" + op: "Mul" + input: "mul_563/x" + input: "bert/encoder/layer_6/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_355" + op: "Add" + input: "truediv_104" + input: "mul_563" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_564" + op: "Mul" + input: "add_2" + input: "add_355" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_104" + op: "Sub" + input: "bert/encoder/layer_6/attention/self/key/kernel/read" + input: "mul_564" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_515" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/key/kernel" + input: "sub_104" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_516" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/key/kernel/adam_m" + input: "add_352" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_517" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/key/kernel/adam_v" + input: "add_353" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_6/attention/self/key/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/self/key/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_6/attention/self/key/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/key/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/self/key/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_565/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_565" + op: "Mul" + input: "Mul_565/x" + input: "bert/encoder/layer_6/attention/self/key/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_566/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_566" + op: "Mul" + input: "Mul_566/x" + input: "clip_by_global_norm/clip_by_global_norm/_104" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_356" + op: "Add" + input: "Mul_565" + input: "Mul_566" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_567/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_567" + op: "Mul" + input: "Mul_567/x" + input: "bert/encoder/layer_6/attention/self/key/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_104" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_104" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_568/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_568" + op: "Mul" + input: "Mul_568/x" + input: "Square_104" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_357" + op: "Add" + input: "Mul_567" + input: "Mul_568" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_104" + op: "Sqrt" + input: "add_357" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_358/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_358" + op: "Add" + input: "Sqrt_104" + input: "add_358/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_105" + op: "RealDiv" + input: "add_356" + input: "add_358" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_569" + op: "Mul" + input: "add_2" + input: "truediv_105" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_105" + op: "Sub" + input: "bert/encoder/layer_6/attention/self/key/bias/read" + input: "mul_569" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_518" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/key/bias" + input: "sub_105" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_519" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/key/bias/adam_m" + input: "add_356" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_520" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/key/bias/adam_v" + input: "add_357" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_6/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_6/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_6/attention/self/value/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/self/value/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_6/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_6/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_6/attention/self/value/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/self/value/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_570/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_570" + op: "Mul" + input: "Mul_570/x" + input: "bert/encoder/layer_6/attention/self/value/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_571/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_571" + op: "Mul" + input: "Mul_571/x" + input: "clip_by_global_norm/clip_by_global_norm/_105" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_359" + op: "Add" + input: "Mul_570" + input: "Mul_571" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_572/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_572" + op: "Mul" + input: "Mul_572/x" + input: "bert/encoder/layer_6/attention/self/value/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_105" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_105" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_573/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_573" + op: "Mul" + input: "Mul_573/x" + input: "Square_105" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_360" + op: "Add" + input: "Mul_572" + input: "Mul_573" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_105" + op: "Sqrt" + input: "add_360" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_361/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_361" + op: "Add" + input: "Sqrt_105" + input: "add_361/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_106" + op: "RealDiv" + input: "add_359" + input: "add_361" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_574/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_574" + op: "Mul" + input: "mul_574/x" + input: "bert/encoder/layer_6/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_362" + op: "Add" + input: "truediv_106" + input: "mul_574" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_575" + op: "Mul" + input: "add_2" + input: "add_362" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_106" + op: "Sub" + input: "bert/encoder/layer_6/attention/self/value/kernel/read" + input: "mul_575" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_521" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/value/kernel" + input: "sub_106" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_522" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/value/kernel/adam_m" + input: "add_359" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_523" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/value/kernel/adam_v" + input: "add_360" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_6/attention/self/value/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/self/value/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_6/attention/self/value/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/self/value/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/self/value/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_576/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_576" + op: "Mul" + input: "Mul_576/x" + input: "bert/encoder/layer_6/attention/self/value/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_577/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_577" + op: "Mul" + input: "Mul_577/x" + input: "clip_by_global_norm/clip_by_global_norm/_106" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_363" + op: "Add" + input: "Mul_576" + input: "Mul_577" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_578/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_578" + op: "Mul" + input: "Mul_578/x" + input: "bert/encoder/layer_6/attention/self/value/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_106" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_106" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_579/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_579" + op: "Mul" + input: "Mul_579/x" + input: "Square_106" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_364" + op: "Add" + input: "Mul_578" + input: "Mul_579" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_106" + op: "Sqrt" + input: "add_364" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_365/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_365" + op: "Add" + input: "Sqrt_106" + input: "add_365/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_107" + op: "RealDiv" + input: "add_363" + input: "add_365" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_580" + op: "Mul" + input: "add_2" + input: "truediv_107" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_107" + op: "Sub" + input: "bert/encoder/layer_6/attention/self/value/bias/read" + input: "mul_580" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_524" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/value/bias" + input: "sub_107" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_525" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/value/bias/adam_m" + input: "add_363" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_526" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/value/bias/adam_v" + input: "add_364" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_581/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_581" + op: "Mul" + input: "Mul_581/x" + input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_582/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_582" + op: "Mul" + input: "Mul_582/x" + input: "clip_by_global_norm/clip_by_global_norm/_107" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_366" + op: "Add" + input: "Mul_581" + input: "Mul_582" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_583/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_583" + op: "Mul" + input: "Mul_583/x" + input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_107" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_107" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_584/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_584" + op: "Mul" + input: "Mul_584/x" + input: "Square_107" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_367" + op: "Add" + input: "Mul_583" + input: "Mul_584" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_107" + op: "Sqrt" + input: "add_367" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_368/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_368" + op: "Add" + input: "Sqrt_107" + input: "add_368/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_108" + op: "RealDiv" + input: "add_366" + input: "add_368" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_585/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_585" + op: "Mul" + input: "mul_585/x" + input: "bert/encoder/layer_6/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_369" + op: "Add" + input: "truediv_108" + input: "mul_585" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_586" + op: "Mul" + input: "add_2" + input: "add_369" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_108" + op: "Sub" + input: "bert/encoder/layer_6/attention/output/dense/kernel/read" + input: "mul_586" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_527" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/dense/kernel" + input: "sub_108" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_528" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m" + input: "add_366" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_529" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v" + input: "add_367" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_6/attention/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_6/attention/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_587/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_587" + op: "Mul" + input: "Mul_587/x" + input: "bert/encoder/layer_6/attention/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_588/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_588" + op: "Mul" + input: "Mul_588/x" + input: "clip_by_global_norm/clip_by_global_norm/_108" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_370" + op: "Add" + input: "Mul_587" + input: "Mul_588" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_589/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_589" + op: "Mul" + input: "Mul_589/x" + input: "bert/encoder/layer_6/attention/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_108" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_108" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_590/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_590" + op: "Mul" + input: "Mul_590/x" + input: "Square_108" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_371" + op: "Add" + input: "Mul_589" + input: "Mul_590" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_108" + op: "Sqrt" + input: "add_371" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_372/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_372" + op: "Add" + input: "Sqrt_108" + input: "add_372/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_109" + op: "RealDiv" + input: "add_370" + input: "add_372" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_591" + op: "Mul" + input: "add_2" + input: "truediv_109" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_109" + op: "Sub" + input: "bert/encoder/layer_6/attention/output/dense/bias/read" + input: "mul_591" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_530" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/dense/bias" + input: "sub_109" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_531" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/dense/bias/adam_m" + input: "add_370" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_532" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/dense/bias/adam_v" + input: "add_371" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_592/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_592" + op: "Mul" + input: "Mul_592/x" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_593/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_593" + op: "Mul" + input: "Mul_593/x" + input: "clip_by_global_norm/clip_by_global_norm/_109" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_373" + op: "Add" + input: "Mul_592" + input: "Mul_593" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_594/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_594" + op: "Mul" + input: "Mul_594/x" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_109" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_109" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_595/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_595" + op: "Mul" + input: "Mul_595/x" + input: "Square_109" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_374" + op: "Add" + input: "Mul_594" + input: "Mul_595" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_109" + op: "Sqrt" + input: "add_374" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_375/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_375" + op: "Add" + input: "Sqrt_109" + input: "add_375/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_110" + op: "RealDiv" + input: "add_373" + input: "add_375" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_596" + op: "Mul" + input: "add_2" + input: "truediv_110" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_110" + op: "Sub" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/read" + input: "mul_596" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_533" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta" + input: "sub_110" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_534" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m" + input: "add_373" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_535" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v" + input: "add_374" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_597/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_597" + op: "Mul" + input: "Mul_597/x" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_598/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_598" + op: "Mul" + input: "Mul_598/x" + input: "clip_by_global_norm/clip_by_global_norm/_110" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_376" + op: "Add" + input: "Mul_597" + input: "Mul_598" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_599/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_599" + op: "Mul" + input: "Mul_599/x" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_110" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_110" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_600/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_600" + op: "Mul" + input: "Mul_600/x" + input: "Square_110" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_377" + op: "Add" + input: "Mul_599" + input: "Mul_600" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_110" + op: "Sqrt" + input: "add_377" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_378/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_378" + op: "Add" + input: "Sqrt_110" + input: "add_378/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_111" + op: "RealDiv" + input: "add_376" + input: "add_378" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_601" + op: "Mul" + input: "add_2" + input: "truediv_111" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_111" + op: "Sub" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/read" + input: "mul_601" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_536" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma" + input: "sub_111" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_537" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m" + input: "add_376" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_538" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v" + input: "add_377" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_602/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_602" + op: "Mul" + input: "Mul_602/x" + input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_603/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_603" + op: "Mul" + input: "Mul_603/x" + input: "clip_by_global_norm/clip_by_global_norm/_111" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_379" + op: "Add" + input: "Mul_602" + input: "Mul_603" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_604/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_604" + op: "Mul" + input: "Mul_604/x" + input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_111" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_111" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_605/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_605" + op: "Mul" + input: "Mul_605/x" + input: "Square_111" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_380" + op: "Add" + input: "Mul_604" + input: "Mul_605" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_111" + op: "Sqrt" + input: "add_380" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_381/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_381" + op: "Add" + input: "Sqrt_111" + input: "add_381/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_112" + op: "RealDiv" + input: "add_379" + input: "add_381" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_606/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_606" + op: "Mul" + input: "mul_606/x" + input: "bert/encoder/layer_6/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_382" + op: "Add" + input: "truediv_112" + input: "mul_606" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_607" + op: "Mul" + input: "add_2" + input: "add_382" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_112" + op: "Sub" + input: "bert/encoder/layer_6/intermediate/dense/kernel/read" + input: "mul_607" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_539" + op: "Assign" + input: "bert/encoder/layer_6/intermediate/dense/kernel" + input: "sub_112" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_540" + op: "Assign" + input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m" + input: "add_379" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_541" + op: "Assign" + input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v" + input: "add_380" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/bias/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_6/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_6/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_6/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_6/intermediate/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_6/intermediate/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/bias/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_6/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_6/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_6/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_6/intermediate/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/intermediate/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_6/intermediate/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_608/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_608" + op: "Mul" + input: "Mul_608/x" + input: "bert/encoder/layer_6/intermediate/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_609/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_609" + op: "Mul" + input: "Mul_609/x" + input: "clip_by_global_norm/clip_by_global_norm/_112" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_383" + op: "Add" + input: "Mul_608" + input: "Mul_609" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_610/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_610" + op: "Mul" + input: "Mul_610/x" + input: "bert/encoder/layer_6/intermediate/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_112" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_112" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_611/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_611" + op: "Mul" + input: "Mul_611/x" + input: "Square_112" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_384" + op: "Add" + input: "Mul_610" + input: "Mul_611" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_112" + op: "Sqrt" + input: "add_384" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_385/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_385" + op: "Add" + input: "Sqrt_112" + input: "add_385/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_113" + op: "RealDiv" + input: "add_383" + input: "add_385" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_612" + op: "Mul" + input: "add_2" + input: "truediv_113" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_113" + op: "Sub" + input: "bert/encoder/layer_6/intermediate/dense/bias/read" + input: "mul_612" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_542" + op: "Assign" + input: "bert/encoder/layer_6/intermediate/dense/bias" + input: "sub_113" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_543" + op: "Assign" + input: "bert/encoder/layer_6/intermediate/dense/bias/adam_m" + input: "add_383" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_544" + op: "Assign" + input: "bert/encoder/layer_6/intermediate/dense/bias/adam_v" + input: "add_384" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_6/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_6/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_6/output/dense/kernel/adam_m" + input: "bert/encoder/layer_6/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_6/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_6/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_6/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_6/output/dense/kernel/adam_v" + input: "bert/encoder/layer_6/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_6/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_613/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_613" + op: "Mul" + input: "Mul_613/x" + input: "bert/encoder/layer_6/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_614/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_614" + op: "Mul" + input: "Mul_614/x" + input: "clip_by_global_norm/clip_by_global_norm/_113" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_386" + op: "Add" + input: "Mul_613" + input: "Mul_614" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_615/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_615" + op: "Mul" + input: "Mul_615/x" + input: "bert/encoder/layer_6/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_113" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_113" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_616/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_616" + op: "Mul" + input: "Mul_616/x" + input: "Square_113" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_387" + op: "Add" + input: "Mul_615" + input: "Mul_616" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_113" + op: "Sqrt" + input: "add_387" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_388/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_388" + op: "Add" + input: "Sqrt_113" + input: "add_388/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_114" + op: "RealDiv" + input: "add_386" + input: "add_388" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_617/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_617" + op: "Mul" + input: "mul_617/x" + input: "bert/encoder/layer_6/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_389" + op: "Add" + input: "truediv_114" + input: "mul_617" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_618" + op: "Mul" + input: "add_2" + input: "add_389" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_114" + op: "Sub" + input: "bert/encoder/layer_6/output/dense/kernel/read" + input: "mul_618" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_545" + op: "Assign" + input: "bert/encoder/layer_6/output/dense/kernel" + input: "sub_114" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_546" + op: "Assign" + input: "bert/encoder/layer_6/output/dense/kernel/adam_m" + input: "add_386" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_547" + op: "Assign" + input: "bert/encoder/layer_6/output/dense/kernel/adam_v" + input: "add_387" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_6/output/dense/bias/adam_m" + input: "bert/encoder/layer_6/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_6/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_6/output/dense/bias/adam_v" + input: "bert/encoder/layer_6/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_6/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_619/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_619" + op: "Mul" + input: "Mul_619/x" + input: "bert/encoder/layer_6/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_620/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_620" + op: "Mul" + input: "Mul_620/x" + input: "clip_by_global_norm/clip_by_global_norm/_114" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_390" + op: "Add" + input: "Mul_619" + input: "Mul_620" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_621/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_621" + op: "Mul" + input: "Mul_621/x" + input: "bert/encoder/layer_6/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_114" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_114" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_622/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_622" + op: "Mul" + input: "Mul_622/x" + input: "Square_114" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_391" + op: "Add" + input: "Mul_621" + input: "Mul_622" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_114" + op: "Sqrt" + input: "add_391" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_392/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_392" + op: "Add" + input: "Sqrt_114" + input: "add_392/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_115" + op: "RealDiv" + input: "add_390" + input: "add_392" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_623" + op: "Mul" + input: "add_2" + input: "truediv_115" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_115" + op: "Sub" + input: "bert/encoder/layer_6/output/dense/bias/read" + input: "mul_623" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_548" + op: "Assign" + input: "bert/encoder/layer_6/output/dense/bias" + input: "sub_115" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_549" + op: "Assign" + input: "bert/encoder/layer_6/output/dense/bias/adam_m" + input: "add_390" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_550" + op: "Assign" + input: "bert/encoder/layer_6/output/dense/bias/adam_v" + input: "add_391" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_624/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_624" + op: "Mul" + input: "Mul_624/x" + input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_625/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_625" + op: "Mul" + input: "Mul_625/x" + input: "clip_by_global_norm/clip_by_global_norm/_115" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_393" + op: "Add" + input: "Mul_624" + input: "Mul_625" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_626/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_626" + op: "Mul" + input: "Mul_626/x" + input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_115" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_115" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_627/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_627" + op: "Mul" + input: "Mul_627/x" + input: "Square_115" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_394" + op: "Add" + input: "Mul_626" + input: "Mul_627" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_115" + op: "Sqrt" + input: "add_394" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_395/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_395" + op: "Add" + input: "Sqrt_115" + input: "add_395/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_116" + op: "RealDiv" + input: "add_393" + input: "add_395" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_628" + op: "Mul" + input: "add_2" + input: "truediv_116" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_116" + op: "Sub" + input: "bert/encoder/layer_6/output/LayerNorm/beta/read" + input: "mul_628" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_551" + op: "Assign" + input: "bert/encoder/layer_6/output/LayerNorm/beta" + input: "sub_116" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_552" + op: "Assign" + input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m" + input: "add_393" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_553" + op: "Assign" + input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v" + input: "add_394" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_629/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_629" + op: "Mul" + input: "Mul_629/x" + input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_630/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_630" + op: "Mul" + input: "Mul_630/x" + input: "clip_by_global_norm/clip_by_global_norm/_116" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_396" + op: "Add" + input: "Mul_629" + input: "Mul_630" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_631/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_631" + op: "Mul" + input: "Mul_631/x" + input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_116" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_116" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_632/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_632" + op: "Mul" + input: "Mul_632/x" + input: "Square_116" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_397" + op: "Add" + input: "Mul_631" + input: "Mul_632" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_116" + op: "Sqrt" + input: "add_397" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_398/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_398" + op: "Add" + input: "Sqrt_116" + input: "add_398/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_117" + op: "RealDiv" + input: "add_396" + input: "add_398" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_633" + op: "Mul" + input: "add_2" + input: "truediv_117" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_117" + op: "Sub" + input: "bert/encoder/layer_6/output/LayerNorm/gamma/read" + input: "mul_633" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_554" + op: "Assign" + input: "bert/encoder/layer_6/output/LayerNorm/gamma" + input: "sub_117" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_555" + op: "Assign" + input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m" + input: "add_396" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_556" + op: "Assign" + input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v" + input: "add_397" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_7/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_7/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_7/attention/self/query/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/self/query/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_7/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_7/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_7/attention/self/query/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/self/query/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_634/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_634" + op: "Mul" + input: "Mul_634/x" + input: "bert/encoder/layer_7/attention/self/query/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_635/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_635" + op: "Mul" + input: "Mul_635/x" + input: "clip_by_global_norm/clip_by_global_norm/_117" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_399" + op: "Add" + input: "Mul_634" + input: "Mul_635" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_636/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_636" + op: "Mul" + input: "Mul_636/x" + input: "bert/encoder/layer_7/attention/self/query/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_117" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_117" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_637/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_637" + op: "Mul" + input: "Mul_637/x" + input: "Square_117" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_400" + op: "Add" + input: "Mul_636" + input: "Mul_637" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_117" + op: "Sqrt" + input: "add_400" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_401/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_401" + op: "Add" + input: "Sqrt_117" + input: "add_401/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_118" + op: "RealDiv" + input: "add_399" + input: "add_401" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_638/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_638" + op: "Mul" + input: "mul_638/x" + input: "bert/encoder/layer_7/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_402" + op: "Add" + input: "truediv_118" + input: "mul_638" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_639" + op: "Mul" + input: "add_2" + input: "add_402" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_118" + op: "Sub" + input: "bert/encoder/layer_7/attention/self/query/kernel/read" + input: "mul_639" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_557" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/query/kernel" + input: "sub_118" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_558" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/query/kernel/adam_m" + input: "add_399" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_559" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/query/kernel/adam_v" + input: "add_400" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_7/attention/self/query/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/self/query/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_7/attention/self/query/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/query/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/self/query/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_640/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_640" + op: "Mul" + input: "Mul_640/x" + input: "bert/encoder/layer_7/attention/self/query/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_641/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_641" + op: "Mul" + input: "Mul_641/x" + input: "clip_by_global_norm/clip_by_global_norm/_118" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_403" + op: "Add" + input: "Mul_640" + input: "Mul_641" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_642/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_642" + op: "Mul" + input: "Mul_642/x" + input: "bert/encoder/layer_7/attention/self/query/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_118" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_118" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_643/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_643" + op: "Mul" + input: "Mul_643/x" + input: "Square_118" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_404" + op: "Add" + input: "Mul_642" + input: "Mul_643" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_118" + op: "Sqrt" + input: "add_404" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_405/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_405" + op: "Add" + input: "Sqrt_118" + input: "add_405/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_119" + op: "RealDiv" + input: "add_403" + input: "add_405" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_644" + op: "Mul" + input: "add_2" + input: "truediv_119" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_119" + op: "Sub" + input: "bert/encoder/layer_7/attention/self/query/bias/read" + input: "mul_644" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_560" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/query/bias" + input: "sub_119" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_561" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/query/bias/adam_m" + input: "add_403" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_562" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/query/bias/adam_v" + input: "add_404" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_7/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_7/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_7/attention/self/key/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/self/key/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_7/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_7/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_7/attention/self/key/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/self/key/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_645/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_645" + op: "Mul" + input: "Mul_645/x" + input: "bert/encoder/layer_7/attention/self/key/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_646/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_646" + op: "Mul" + input: "Mul_646/x" + input: "clip_by_global_norm/clip_by_global_norm/_119" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_406" + op: "Add" + input: "Mul_645" + input: "Mul_646" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_647/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_647" + op: "Mul" + input: "Mul_647/x" + input: "bert/encoder/layer_7/attention/self/key/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_119" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_119" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_648/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_648" + op: "Mul" + input: "Mul_648/x" + input: "Square_119" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_407" + op: "Add" + input: "Mul_647" + input: "Mul_648" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_119" + op: "Sqrt" + input: "add_407" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_408/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_408" + op: "Add" + input: "Sqrt_119" + input: "add_408/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_120" + op: "RealDiv" + input: "add_406" + input: "add_408" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_649/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_649" + op: "Mul" + input: "mul_649/x" + input: "bert/encoder/layer_7/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_409" + op: "Add" + input: "truediv_120" + input: "mul_649" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_650" + op: "Mul" + input: "add_2" + input: "add_409" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_120" + op: "Sub" + input: "bert/encoder/layer_7/attention/self/key/kernel/read" + input: "mul_650" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_563" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/key/kernel" + input: "sub_120" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_564" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/key/kernel/adam_m" + input: "add_406" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_565" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/key/kernel/adam_v" + input: "add_407" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_7/attention/self/key/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/self/key/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_7/attention/self/key/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/key/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/self/key/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_651/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_651" + op: "Mul" + input: "Mul_651/x" + input: "bert/encoder/layer_7/attention/self/key/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_652/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_652" + op: "Mul" + input: "Mul_652/x" + input: "clip_by_global_norm/clip_by_global_norm/_120" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_410" + op: "Add" + input: "Mul_651" + input: "Mul_652" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_653/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_653" + op: "Mul" + input: "Mul_653/x" + input: "bert/encoder/layer_7/attention/self/key/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_120" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_120" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_654/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_654" + op: "Mul" + input: "Mul_654/x" + input: "Square_120" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_411" + op: "Add" + input: "Mul_653" + input: "Mul_654" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_120" + op: "Sqrt" + input: "add_411" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_412/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_412" + op: "Add" + input: "Sqrt_120" + input: "add_412/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_121" + op: "RealDiv" + input: "add_410" + input: "add_412" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_655" + op: "Mul" + input: "add_2" + input: "truediv_121" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_121" + op: "Sub" + input: "bert/encoder/layer_7/attention/self/key/bias/read" + input: "mul_655" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_566" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/key/bias" + input: "sub_121" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_567" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/key/bias/adam_m" + input: "add_410" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_568" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/key/bias/adam_v" + input: "add_411" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_7/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_7/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_7/attention/self/value/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/self/value/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_7/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_7/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_7/attention/self/value/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/self/value/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_656/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_656" + op: "Mul" + input: "Mul_656/x" + input: "bert/encoder/layer_7/attention/self/value/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_657/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_657" + op: "Mul" + input: "Mul_657/x" + input: "clip_by_global_norm/clip_by_global_norm/_121" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_413" + op: "Add" + input: "Mul_656" + input: "Mul_657" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_658/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_658" + op: "Mul" + input: "Mul_658/x" + input: "bert/encoder/layer_7/attention/self/value/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_121" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_121" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_659/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_659" + op: "Mul" + input: "Mul_659/x" + input: "Square_121" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_414" + op: "Add" + input: "Mul_658" + input: "Mul_659" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_121" + op: "Sqrt" + input: "add_414" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_415/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_415" + op: "Add" + input: "Sqrt_121" + input: "add_415/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_122" + op: "RealDiv" + input: "add_413" + input: "add_415" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_660/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_660" + op: "Mul" + input: "mul_660/x" + input: "bert/encoder/layer_7/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_416" + op: "Add" + input: "truediv_122" + input: "mul_660" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_661" + op: "Mul" + input: "add_2" + input: "add_416" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_122" + op: "Sub" + input: "bert/encoder/layer_7/attention/self/value/kernel/read" + input: "mul_661" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_569" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/value/kernel" + input: "sub_122" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_570" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/value/kernel/adam_m" + input: "add_413" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_571" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/value/kernel/adam_v" + input: "add_414" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_7/attention/self/value/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/self/value/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_7/attention/self/value/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/self/value/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/self/value/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_662/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_662" + op: "Mul" + input: "Mul_662/x" + input: "bert/encoder/layer_7/attention/self/value/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_663/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_663" + op: "Mul" + input: "Mul_663/x" + input: "clip_by_global_norm/clip_by_global_norm/_122" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_417" + op: "Add" + input: "Mul_662" + input: "Mul_663" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_664/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_664" + op: "Mul" + input: "Mul_664/x" + input: "bert/encoder/layer_7/attention/self/value/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_122" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_122" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_665/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_665" + op: "Mul" + input: "Mul_665/x" + input: "Square_122" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_418" + op: "Add" + input: "Mul_664" + input: "Mul_665" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_122" + op: "Sqrt" + input: "add_418" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_419/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_419" + op: "Add" + input: "Sqrt_122" + input: "add_419/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_123" + op: "RealDiv" + input: "add_417" + input: "add_419" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_666" + op: "Mul" + input: "add_2" + input: "truediv_123" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_123" + op: "Sub" + input: "bert/encoder/layer_7/attention/self/value/bias/read" + input: "mul_666" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_572" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/value/bias" + input: "sub_123" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_573" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/value/bias/adam_m" + input: "add_417" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_574" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/value/bias/adam_v" + input: "add_418" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_667/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_667" + op: "Mul" + input: "Mul_667/x" + input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_668/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_668" + op: "Mul" + input: "Mul_668/x" + input: "clip_by_global_norm/clip_by_global_norm/_123" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_420" + op: "Add" + input: "Mul_667" + input: "Mul_668" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_669/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_669" + op: "Mul" + input: "Mul_669/x" + input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_123" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_123" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_670/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_670" + op: "Mul" + input: "Mul_670/x" + input: "Square_123" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_421" + op: "Add" + input: "Mul_669" + input: "Mul_670" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_123" + op: "Sqrt" + input: "add_421" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_422/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_422" + op: "Add" + input: "Sqrt_123" + input: "add_422/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_124" + op: "RealDiv" + input: "add_420" + input: "add_422" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_671/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_671" + op: "Mul" + input: "mul_671/x" + input: "bert/encoder/layer_7/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_423" + op: "Add" + input: "truediv_124" + input: "mul_671" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_672" + op: "Mul" + input: "add_2" + input: "add_423" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_124" + op: "Sub" + input: "bert/encoder/layer_7/attention/output/dense/kernel/read" + input: "mul_672" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_575" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/dense/kernel" + input: "sub_124" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_576" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m" + input: "add_420" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_577" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v" + input: "add_421" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_7/attention/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_7/attention/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_673/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_673" + op: "Mul" + input: "Mul_673/x" + input: "bert/encoder/layer_7/attention/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_674/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_674" + op: "Mul" + input: "Mul_674/x" + input: "clip_by_global_norm/clip_by_global_norm/_124" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_424" + op: "Add" + input: "Mul_673" + input: "Mul_674" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_675/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_675" + op: "Mul" + input: "Mul_675/x" + input: "bert/encoder/layer_7/attention/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_124" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_124" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_676/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_676" + op: "Mul" + input: "Mul_676/x" + input: "Square_124" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_425" + op: "Add" + input: "Mul_675" + input: "Mul_676" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_124" + op: "Sqrt" + input: "add_425" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_426/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_426" + op: "Add" + input: "Sqrt_124" + input: "add_426/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_125" + op: "RealDiv" + input: "add_424" + input: "add_426" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_677" + op: "Mul" + input: "add_2" + input: "truediv_125" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_125" + op: "Sub" + input: "bert/encoder/layer_7/attention/output/dense/bias/read" + input: "mul_677" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_578" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/dense/bias" + input: "sub_125" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_579" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/dense/bias/adam_m" + input: "add_424" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_580" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/dense/bias/adam_v" + input: "add_425" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_678/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_678" + op: "Mul" + input: "Mul_678/x" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_679/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_679" + op: "Mul" + input: "Mul_679/x" + input: "clip_by_global_norm/clip_by_global_norm/_125" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_427" + op: "Add" + input: "Mul_678" + input: "Mul_679" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_680/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_680" + op: "Mul" + input: "Mul_680/x" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_125" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_125" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_681/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_681" + op: "Mul" + input: "Mul_681/x" + input: "Square_125" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_428" + op: "Add" + input: "Mul_680" + input: "Mul_681" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_125" + op: "Sqrt" + input: "add_428" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_429/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_429" + op: "Add" + input: "Sqrt_125" + input: "add_429/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_126" + op: "RealDiv" + input: "add_427" + input: "add_429" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_682" + op: "Mul" + input: "add_2" + input: "truediv_126" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_126" + op: "Sub" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/read" + input: "mul_682" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_581" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta" + input: "sub_126" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_582" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m" + input: "add_427" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_583" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v" + input: "add_428" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_683/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_683" + op: "Mul" + input: "Mul_683/x" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_684/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_684" + op: "Mul" + input: "Mul_684/x" + input: "clip_by_global_norm/clip_by_global_norm/_126" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_430" + op: "Add" + input: "Mul_683" + input: "Mul_684" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_685/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_685" + op: "Mul" + input: "Mul_685/x" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_126" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_126" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_686/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_686" + op: "Mul" + input: "Mul_686/x" + input: "Square_126" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_431" + op: "Add" + input: "Mul_685" + input: "Mul_686" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_126" + op: "Sqrt" + input: "add_431" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_432/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_432" + op: "Add" + input: "Sqrt_126" + input: "add_432/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_127" + op: "RealDiv" + input: "add_430" + input: "add_432" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_687" + op: "Mul" + input: "add_2" + input: "truediv_127" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_127" + op: "Sub" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/read" + input: "mul_687" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_584" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma" + input: "sub_127" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_585" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m" + input: "add_430" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_586" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v" + input: "add_431" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_688/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_688" + op: "Mul" + input: "Mul_688/x" + input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_689/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_689" + op: "Mul" + input: "Mul_689/x" + input: "clip_by_global_norm/clip_by_global_norm/_127" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_433" + op: "Add" + input: "Mul_688" + input: "Mul_689" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_690/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_690" + op: "Mul" + input: "Mul_690/x" + input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_127" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_127" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_691/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_691" + op: "Mul" + input: "Mul_691/x" + input: "Square_127" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_434" + op: "Add" + input: "Mul_690" + input: "Mul_691" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_127" + op: "Sqrt" + input: "add_434" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_435/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_435" + op: "Add" + input: "Sqrt_127" + input: "add_435/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_128" + op: "RealDiv" + input: "add_433" + input: "add_435" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_692/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_692" + op: "Mul" + input: "mul_692/x" + input: "bert/encoder/layer_7/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_436" + op: "Add" + input: "truediv_128" + input: "mul_692" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_693" + op: "Mul" + input: "add_2" + input: "add_436" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_128" + op: "Sub" + input: "bert/encoder/layer_7/intermediate/dense/kernel/read" + input: "mul_693" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_587" + op: "Assign" + input: "bert/encoder/layer_7/intermediate/dense/kernel" + input: "sub_128" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_588" + op: "Assign" + input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m" + input: "add_433" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_589" + op: "Assign" + input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v" + input: "add_434" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/bias/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_7/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_7/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_7/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_7/intermediate/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_7/intermediate/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/bias/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_7/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_7/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_7/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_7/intermediate/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/intermediate/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_7/intermediate/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_694/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_694" + op: "Mul" + input: "Mul_694/x" + input: "bert/encoder/layer_7/intermediate/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_695/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_695" + op: "Mul" + input: "Mul_695/x" + input: "clip_by_global_norm/clip_by_global_norm/_128" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_437" + op: "Add" + input: "Mul_694" + input: "Mul_695" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_696/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_696" + op: "Mul" + input: "Mul_696/x" + input: "bert/encoder/layer_7/intermediate/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_128" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_128" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_697/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_697" + op: "Mul" + input: "Mul_697/x" + input: "Square_128" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_438" + op: "Add" + input: "Mul_696" + input: "Mul_697" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_128" + op: "Sqrt" + input: "add_438" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_439/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_439" + op: "Add" + input: "Sqrt_128" + input: "add_439/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_129" + op: "RealDiv" + input: "add_437" + input: "add_439" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_698" + op: "Mul" + input: "add_2" + input: "truediv_129" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_129" + op: "Sub" + input: "bert/encoder/layer_7/intermediate/dense/bias/read" + input: "mul_698" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_590" + op: "Assign" + input: "bert/encoder/layer_7/intermediate/dense/bias" + input: "sub_129" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_591" + op: "Assign" + input: "bert/encoder/layer_7/intermediate/dense/bias/adam_m" + input: "add_437" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_592" + op: "Assign" + input: "bert/encoder/layer_7/intermediate/dense/bias/adam_v" + input: "add_438" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_7/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_7/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_7/output/dense/kernel/adam_m" + input: "bert/encoder/layer_7/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_7/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_7/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_7/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_7/output/dense/kernel/adam_v" + input: "bert/encoder/layer_7/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_7/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_699/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_699" + op: "Mul" + input: "Mul_699/x" + input: "bert/encoder/layer_7/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_700/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_700" + op: "Mul" + input: "Mul_700/x" + input: "clip_by_global_norm/clip_by_global_norm/_129" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_440" + op: "Add" + input: "Mul_699" + input: "Mul_700" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_701/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_701" + op: "Mul" + input: "Mul_701/x" + input: "bert/encoder/layer_7/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_129" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_129" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_702/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_702" + op: "Mul" + input: "Mul_702/x" + input: "Square_129" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_441" + op: "Add" + input: "Mul_701" + input: "Mul_702" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_129" + op: "Sqrt" + input: "add_441" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_442/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_442" + op: "Add" + input: "Sqrt_129" + input: "add_442/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_130" + op: "RealDiv" + input: "add_440" + input: "add_442" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_703/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_703" + op: "Mul" + input: "mul_703/x" + input: "bert/encoder/layer_7/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_443" + op: "Add" + input: "truediv_130" + input: "mul_703" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_704" + op: "Mul" + input: "add_2" + input: "add_443" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_130" + op: "Sub" + input: "bert/encoder/layer_7/output/dense/kernel/read" + input: "mul_704" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_593" + op: "Assign" + input: "bert/encoder/layer_7/output/dense/kernel" + input: "sub_130" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_594" + op: "Assign" + input: "bert/encoder/layer_7/output/dense/kernel/adam_m" + input: "add_440" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_595" + op: "Assign" + input: "bert/encoder/layer_7/output/dense/kernel/adam_v" + input: "add_441" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_7/output/dense/bias/adam_m" + input: "bert/encoder/layer_7/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_7/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_7/output/dense/bias/adam_v" + input: "bert/encoder/layer_7/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_7/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_705/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_705" + op: "Mul" + input: "Mul_705/x" + input: "bert/encoder/layer_7/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_706/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_706" + op: "Mul" + input: "Mul_706/x" + input: "clip_by_global_norm/clip_by_global_norm/_130" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_444" + op: "Add" + input: "Mul_705" + input: "Mul_706" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_707/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_707" + op: "Mul" + input: "Mul_707/x" + input: "bert/encoder/layer_7/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_130" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_130" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_708/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_708" + op: "Mul" + input: "Mul_708/x" + input: "Square_130" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_445" + op: "Add" + input: "Mul_707" + input: "Mul_708" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_130" + op: "Sqrt" + input: "add_445" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_446/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_446" + op: "Add" + input: "Sqrt_130" + input: "add_446/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_131" + op: "RealDiv" + input: "add_444" + input: "add_446" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_709" + op: "Mul" + input: "add_2" + input: "truediv_131" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_131" + op: "Sub" + input: "bert/encoder/layer_7/output/dense/bias/read" + input: "mul_709" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_596" + op: "Assign" + input: "bert/encoder/layer_7/output/dense/bias" + input: "sub_131" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_597" + op: "Assign" + input: "bert/encoder/layer_7/output/dense/bias/adam_m" + input: "add_444" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_598" + op: "Assign" + input: "bert/encoder/layer_7/output/dense/bias/adam_v" + input: "add_445" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_710/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_710" + op: "Mul" + input: "Mul_710/x" + input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_711/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_711" + op: "Mul" + input: "Mul_711/x" + input: "clip_by_global_norm/clip_by_global_norm/_131" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_447" + op: "Add" + input: "Mul_710" + input: "Mul_711" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_712/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_712" + op: "Mul" + input: "Mul_712/x" + input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_131" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_131" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_713/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_713" + op: "Mul" + input: "Mul_713/x" + input: "Square_131" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_448" + op: "Add" + input: "Mul_712" + input: "Mul_713" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_131" + op: "Sqrt" + input: "add_448" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_449/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_449" + op: "Add" + input: "Sqrt_131" + input: "add_449/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_132" + op: "RealDiv" + input: "add_447" + input: "add_449" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_714" + op: "Mul" + input: "add_2" + input: "truediv_132" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_132" + op: "Sub" + input: "bert/encoder/layer_7/output/LayerNorm/beta/read" + input: "mul_714" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_599" + op: "Assign" + input: "bert/encoder/layer_7/output/LayerNorm/beta" + input: "sub_132" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_600" + op: "Assign" + input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m" + input: "add_447" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_601" + op: "Assign" + input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v" + input: "add_448" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_715/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_715" + op: "Mul" + input: "Mul_715/x" + input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_716/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_716" + op: "Mul" + input: "Mul_716/x" + input: "clip_by_global_norm/clip_by_global_norm/_132" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_450" + op: "Add" + input: "Mul_715" + input: "Mul_716" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_717/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_717" + op: "Mul" + input: "Mul_717/x" + input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_132" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_132" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_718/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_718" + op: "Mul" + input: "Mul_718/x" + input: "Square_132" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_451" + op: "Add" + input: "Mul_717" + input: "Mul_718" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_132" + op: "Sqrt" + input: "add_451" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_452/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_452" + op: "Add" + input: "Sqrt_132" + input: "add_452/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_133" + op: "RealDiv" + input: "add_450" + input: "add_452" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_719" + op: "Mul" + input: "add_2" + input: "truediv_133" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_133" + op: "Sub" + input: "bert/encoder/layer_7/output/LayerNorm/gamma/read" + input: "mul_719" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_602" + op: "Assign" + input: "bert/encoder/layer_7/output/LayerNorm/gamma" + input: "sub_133" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_603" + op: "Assign" + input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m" + input: "add_450" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_604" + op: "Assign" + input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v" + input: "add_451" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_8/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_8/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_8/attention/self/query/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/self/query/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_8/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_8/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_8/attention/self/query/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/self/query/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_720/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_720" + op: "Mul" + input: "Mul_720/x" + input: "bert/encoder/layer_8/attention/self/query/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_721/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_721" + op: "Mul" + input: "Mul_721/x" + input: "clip_by_global_norm/clip_by_global_norm/_133" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_453" + op: "Add" + input: "Mul_720" + input: "Mul_721" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_722/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_722" + op: "Mul" + input: "Mul_722/x" + input: "bert/encoder/layer_8/attention/self/query/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_133" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_133" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_723/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_723" + op: "Mul" + input: "Mul_723/x" + input: "Square_133" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_454" + op: "Add" + input: "Mul_722" + input: "Mul_723" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_133" + op: "Sqrt" + input: "add_454" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_455/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_455" + op: "Add" + input: "Sqrt_133" + input: "add_455/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_134" + op: "RealDiv" + input: "add_453" + input: "add_455" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_724/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_724" + op: "Mul" + input: "mul_724/x" + input: "bert/encoder/layer_8/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_456" + op: "Add" + input: "truediv_134" + input: "mul_724" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_725" + op: "Mul" + input: "add_2" + input: "add_456" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_134" + op: "Sub" + input: "bert/encoder/layer_8/attention/self/query/kernel/read" + input: "mul_725" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_605" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/query/kernel" + input: "sub_134" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_606" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/query/kernel/adam_m" + input: "add_453" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_607" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/query/kernel/adam_v" + input: "add_454" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_8/attention/self/query/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/self/query/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_8/attention/self/query/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/query/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/self/query/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_726/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_726" + op: "Mul" + input: "Mul_726/x" + input: "bert/encoder/layer_8/attention/self/query/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_727/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_727" + op: "Mul" + input: "Mul_727/x" + input: "clip_by_global_norm/clip_by_global_norm/_134" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_457" + op: "Add" + input: "Mul_726" + input: "Mul_727" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_728/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_728" + op: "Mul" + input: "Mul_728/x" + input: "bert/encoder/layer_8/attention/self/query/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_134" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_134" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_729/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_729" + op: "Mul" + input: "Mul_729/x" + input: "Square_134" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_458" + op: "Add" + input: "Mul_728" + input: "Mul_729" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_134" + op: "Sqrt" + input: "add_458" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_459/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_459" + op: "Add" + input: "Sqrt_134" + input: "add_459/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_135" + op: "RealDiv" + input: "add_457" + input: "add_459" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_730" + op: "Mul" + input: "add_2" + input: "truediv_135" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_135" + op: "Sub" + input: "bert/encoder/layer_8/attention/self/query/bias/read" + input: "mul_730" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_608" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/query/bias" + input: "sub_135" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_609" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/query/bias/adam_m" + input: "add_457" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_610" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/query/bias/adam_v" + input: "add_458" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_8/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_8/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_8/attention/self/key/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/self/key/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_8/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_8/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_8/attention/self/key/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/self/key/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_731/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_731" + op: "Mul" + input: "Mul_731/x" + input: "bert/encoder/layer_8/attention/self/key/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_732/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_732" + op: "Mul" + input: "Mul_732/x" + input: "clip_by_global_norm/clip_by_global_norm/_135" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_460" + op: "Add" + input: "Mul_731" + input: "Mul_732" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_733/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_733" + op: "Mul" + input: "Mul_733/x" + input: "bert/encoder/layer_8/attention/self/key/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_135" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_135" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_734/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_734" + op: "Mul" + input: "Mul_734/x" + input: "Square_135" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_461" + op: "Add" + input: "Mul_733" + input: "Mul_734" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_135" + op: "Sqrt" + input: "add_461" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_462/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_462" + op: "Add" + input: "Sqrt_135" + input: "add_462/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_136" + op: "RealDiv" + input: "add_460" + input: "add_462" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_735/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_735" + op: "Mul" + input: "mul_735/x" + input: "bert/encoder/layer_8/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_463" + op: "Add" + input: "truediv_136" + input: "mul_735" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_736" + op: "Mul" + input: "add_2" + input: "add_463" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_136" + op: "Sub" + input: "bert/encoder/layer_8/attention/self/key/kernel/read" + input: "mul_736" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_611" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/key/kernel" + input: "sub_136" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_612" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/key/kernel/adam_m" + input: "add_460" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_613" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/key/kernel/adam_v" + input: "add_461" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_8/attention/self/key/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/self/key/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_8/attention/self/key/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/key/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/self/key/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_737/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_737" + op: "Mul" + input: "Mul_737/x" + input: "bert/encoder/layer_8/attention/self/key/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_738/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_738" + op: "Mul" + input: "Mul_738/x" + input: "clip_by_global_norm/clip_by_global_norm/_136" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_464" + op: "Add" + input: "Mul_737" + input: "Mul_738" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_739/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_739" + op: "Mul" + input: "Mul_739/x" + input: "bert/encoder/layer_8/attention/self/key/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_136" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_136" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_740/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_740" + op: "Mul" + input: "Mul_740/x" + input: "Square_136" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_465" + op: "Add" + input: "Mul_739" + input: "Mul_740" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_136" + op: "Sqrt" + input: "add_465" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_466/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_466" + op: "Add" + input: "Sqrt_136" + input: "add_466/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_137" + op: "RealDiv" + input: "add_464" + input: "add_466" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_741" + op: "Mul" + input: "add_2" + input: "truediv_137" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_137" + op: "Sub" + input: "bert/encoder/layer_8/attention/self/key/bias/read" + input: "mul_741" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_614" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/key/bias" + input: "sub_137" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_615" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/key/bias/adam_m" + input: "add_464" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_616" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/key/bias/adam_v" + input: "add_465" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_8/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_8/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_8/attention/self/value/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/self/value/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_8/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_8/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_8/attention/self/value/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/self/value/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_742/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_742" + op: "Mul" + input: "Mul_742/x" + input: "bert/encoder/layer_8/attention/self/value/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_743/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_743" + op: "Mul" + input: "Mul_743/x" + input: "clip_by_global_norm/clip_by_global_norm/_137" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_467" + op: "Add" + input: "Mul_742" + input: "Mul_743" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_744/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_744" + op: "Mul" + input: "Mul_744/x" + input: "bert/encoder/layer_8/attention/self/value/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_137" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_137" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_745/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_745" + op: "Mul" + input: "Mul_745/x" + input: "Square_137" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_468" + op: "Add" + input: "Mul_744" + input: "Mul_745" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_137" + op: "Sqrt" + input: "add_468" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_469/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_469" + op: "Add" + input: "Sqrt_137" + input: "add_469/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_138" + op: "RealDiv" + input: "add_467" + input: "add_469" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_746/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_746" + op: "Mul" + input: "mul_746/x" + input: "bert/encoder/layer_8/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_470" + op: "Add" + input: "truediv_138" + input: "mul_746" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_747" + op: "Mul" + input: "add_2" + input: "add_470" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_138" + op: "Sub" + input: "bert/encoder/layer_8/attention/self/value/kernel/read" + input: "mul_747" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_617" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/value/kernel" + input: "sub_138" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_618" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/value/kernel/adam_m" + input: "add_467" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_619" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/value/kernel/adam_v" + input: "add_468" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_8/attention/self/value/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/self/value/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_8/attention/self/value/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/self/value/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/self/value/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_748/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_748" + op: "Mul" + input: "Mul_748/x" + input: "bert/encoder/layer_8/attention/self/value/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_749/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_749" + op: "Mul" + input: "Mul_749/x" + input: "clip_by_global_norm/clip_by_global_norm/_138" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_471" + op: "Add" + input: "Mul_748" + input: "Mul_749" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_750/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_750" + op: "Mul" + input: "Mul_750/x" + input: "bert/encoder/layer_8/attention/self/value/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_138" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_138" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_751/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_751" + op: "Mul" + input: "Mul_751/x" + input: "Square_138" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_472" + op: "Add" + input: "Mul_750" + input: "Mul_751" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_138" + op: "Sqrt" + input: "add_472" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_473/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_473" + op: "Add" + input: "Sqrt_138" + input: "add_473/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_139" + op: "RealDiv" + input: "add_471" + input: "add_473" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_752" + op: "Mul" + input: "add_2" + input: "truediv_139" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_139" + op: "Sub" + input: "bert/encoder/layer_8/attention/self/value/bias/read" + input: "mul_752" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_620" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/value/bias" + input: "sub_139" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_621" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/value/bias/adam_m" + input: "add_471" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_622" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/value/bias/adam_v" + input: "add_472" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_753/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_753" + op: "Mul" + input: "Mul_753/x" + input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_754/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_754" + op: "Mul" + input: "Mul_754/x" + input: "clip_by_global_norm/clip_by_global_norm/_139" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_474" + op: "Add" + input: "Mul_753" + input: "Mul_754" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_755/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_755" + op: "Mul" + input: "Mul_755/x" + input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_139" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_139" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_756/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_756" + op: "Mul" + input: "Mul_756/x" + input: "Square_139" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_475" + op: "Add" + input: "Mul_755" + input: "Mul_756" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_139" + op: "Sqrt" + input: "add_475" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_476/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_476" + op: "Add" + input: "Sqrt_139" + input: "add_476/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_140" + op: "RealDiv" + input: "add_474" + input: "add_476" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_757/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_757" + op: "Mul" + input: "mul_757/x" + input: "bert/encoder/layer_8/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_477" + op: "Add" + input: "truediv_140" + input: "mul_757" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_758" + op: "Mul" + input: "add_2" + input: "add_477" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_140" + op: "Sub" + input: "bert/encoder/layer_8/attention/output/dense/kernel/read" + input: "mul_758" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_623" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/dense/kernel" + input: "sub_140" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_624" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m" + input: "add_474" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_625" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v" + input: "add_475" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_8/attention/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_8/attention/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_759/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_759" + op: "Mul" + input: "Mul_759/x" + input: "bert/encoder/layer_8/attention/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_760/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_760" + op: "Mul" + input: "Mul_760/x" + input: "clip_by_global_norm/clip_by_global_norm/_140" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_478" + op: "Add" + input: "Mul_759" + input: "Mul_760" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_761/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_761" + op: "Mul" + input: "Mul_761/x" + input: "bert/encoder/layer_8/attention/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_140" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_140" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_762/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_762" + op: "Mul" + input: "Mul_762/x" + input: "Square_140" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_479" + op: "Add" + input: "Mul_761" + input: "Mul_762" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_140" + op: "Sqrt" + input: "add_479" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_480/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_480" + op: "Add" + input: "Sqrt_140" + input: "add_480/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_141" + op: "RealDiv" + input: "add_478" + input: "add_480" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_763" + op: "Mul" + input: "add_2" + input: "truediv_141" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_141" + op: "Sub" + input: "bert/encoder/layer_8/attention/output/dense/bias/read" + input: "mul_763" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_626" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/dense/bias" + input: "sub_141" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_627" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/dense/bias/adam_m" + input: "add_478" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_628" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/dense/bias/adam_v" + input: "add_479" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_764/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_764" + op: "Mul" + input: "Mul_764/x" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_765/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_765" + op: "Mul" + input: "Mul_765/x" + input: "clip_by_global_norm/clip_by_global_norm/_141" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_481" + op: "Add" + input: "Mul_764" + input: "Mul_765" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_766/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_766" + op: "Mul" + input: "Mul_766/x" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_141" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_141" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_767/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_767" + op: "Mul" + input: "Mul_767/x" + input: "Square_141" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_482" + op: "Add" + input: "Mul_766" + input: "Mul_767" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_141" + op: "Sqrt" + input: "add_482" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_483/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_483" + op: "Add" + input: "Sqrt_141" + input: "add_483/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_142" + op: "RealDiv" + input: "add_481" + input: "add_483" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_768" + op: "Mul" + input: "add_2" + input: "truediv_142" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_142" + op: "Sub" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/read" + input: "mul_768" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_629" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta" + input: "sub_142" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_630" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m" + input: "add_481" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_631" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v" + input: "add_482" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_769/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_769" + op: "Mul" + input: "Mul_769/x" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_770/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_770" + op: "Mul" + input: "Mul_770/x" + input: "clip_by_global_norm/clip_by_global_norm/_142" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_484" + op: "Add" + input: "Mul_769" + input: "Mul_770" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_771/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_771" + op: "Mul" + input: "Mul_771/x" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_142" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_142" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_772/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_772" + op: "Mul" + input: "Mul_772/x" + input: "Square_142" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_485" + op: "Add" + input: "Mul_771" + input: "Mul_772" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_142" + op: "Sqrt" + input: "add_485" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_486/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_486" + op: "Add" + input: "Sqrt_142" + input: "add_486/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_143" + op: "RealDiv" + input: "add_484" + input: "add_486" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_773" + op: "Mul" + input: "add_2" + input: "truediv_143" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_143" + op: "Sub" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/read" + input: "mul_773" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_632" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma" + input: "sub_143" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_633" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m" + input: "add_484" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_634" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v" + input: "add_485" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_774/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_774" + op: "Mul" + input: "Mul_774/x" + input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_775/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_775" + op: "Mul" + input: "Mul_775/x" + input: "clip_by_global_norm/clip_by_global_norm/_143" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_487" + op: "Add" + input: "Mul_774" + input: "Mul_775" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_776/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_776" + op: "Mul" + input: "Mul_776/x" + input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_143" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_143" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_777/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_777" + op: "Mul" + input: "Mul_777/x" + input: "Square_143" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_488" + op: "Add" + input: "Mul_776" + input: "Mul_777" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_143" + op: "Sqrt" + input: "add_488" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_489/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_489" + op: "Add" + input: "Sqrt_143" + input: "add_489/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_144" + op: "RealDiv" + input: "add_487" + input: "add_489" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_778/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_778" + op: "Mul" + input: "mul_778/x" + input: "bert/encoder/layer_8/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_490" + op: "Add" + input: "truediv_144" + input: "mul_778" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_779" + op: "Mul" + input: "add_2" + input: "add_490" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_144" + op: "Sub" + input: "bert/encoder/layer_8/intermediate/dense/kernel/read" + input: "mul_779" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_635" + op: "Assign" + input: "bert/encoder/layer_8/intermediate/dense/kernel" + input: "sub_144" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_636" + op: "Assign" + input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m" + input: "add_487" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_637" + op: "Assign" + input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v" + input: "add_488" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/bias/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_8/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_8/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_8/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_8/intermediate/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_8/intermediate/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/bias/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_8/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_8/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_8/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_8/intermediate/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/intermediate/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_8/intermediate/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_780/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_780" + op: "Mul" + input: "Mul_780/x" + input: "bert/encoder/layer_8/intermediate/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_781/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_781" + op: "Mul" + input: "Mul_781/x" + input: "clip_by_global_norm/clip_by_global_norm/_144" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_491" + op: "Add" + input: "Mul_780" + input: "Mul_781" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_782/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_782" + op: "Mul" + input: "Mul_782/x" + input: "bert/encoder/layer_8/intermediate/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_144" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_144" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_783/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_783" + op: "Mul" + input: "Mul_783/x" + input: "Square_144" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_492" + op: "Add" + input: "Mul_782" + input: "Mul_783" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_144" + op: "Sqrt" + input: "add_492" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_493/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_493" + op: "Add" + input: "Sqrt_144" + input: "add_493/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_145" + op: "RealDiv" + input: "add_491" + input: "add_493" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_784" + op: "Mul" + input: "add_2" + input: "truediv_145" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_145" + op: "Sub" + input: "bert/encoder/layer_8/intermediate/dense/bias/read" + input: "mul_784" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_638" + op: "Assign" + input: "bert/encoder/layer_8/intermediate/dense/bias" + input: "sub_145" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_639" + op: "Assign" + input: "bert/encoder/layer_8/intermediate/dense/bias/adam_m" + input: "add_491" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_640" + op: "Assign" + input: "bert/encoder/layer_8/intermediate/dense/bias/adam_v" + input: "add_492" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_8/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_8/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_8/output/dense/kernel/adam_m" + input: "bert/encoder/layer_8/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_8/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_8/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_8/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_8/output/dense/kernel/adam_v" + input: "bert/encoder/layer_8/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_8/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_785/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_785" + op: "Mul" + input: "Mul_785/x" + input: "bert/encoder/layer_8/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_786/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_786" + op: "Mul" + input: "Mul_786/x" + input: "clip_by_global_norm/clip_by_global_norm/_145" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_494" + op: "Add" + input: "Mul_785" + input: "Mul_786" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_787/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_787" + op: "Mul" + input: "Mul_787/x" + input: "bert/encoder/layer_8/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_145" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_145" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_788/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_788" + op: "Mul" + input: "Mul_788/x" + input: "Square_145" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_495" + op: "Add" + input: "Mul_787" + input: "Mul_788" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_145" + op: "Sqrt" + input: "add_495" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_496/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_496" + op: "Add" + input: "Sqrt_145" + input: "add_496/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_146" + op: "RealDiv" + input: "add_494" + input: "add_496" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_789/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_789" + op: "Mul" + input: "mul_789/x" + input: "bert/encoder/layer_8/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_497" + op: "Add" + input: "truediv_146" + input: "mul_789" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_790" + op: "Mul" + input: "add_2" + input: "add_497" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_146" + op: "Sub" + input: "bert/encoder/layer_8/output/dense/kernel/read" + input: "mul_790" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_641" + op: "Assign" + input: "bert/encoder/layer_8/output/dense/kernel" + input: "sub_146" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_642" + op: "Assign" + input: "bert/encoder/layer_8/output/dense/kernel/adam_m" + input: "add_494" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_643" + op: "Assign" + input: "bert/encoder/layer_8/output/dense/kernel/adam_v" + input: "add_495" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_8/output/dense/bias/adam_m" + input: "bert/encoder/layer_8/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_8/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_8/output/dense/bias/adam_v" + input: "bert/encoder/layer_8/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_8/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_791/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_791" + op: "Mul" + input: "Mul_791/x" + input: "bert/encoder/layer_8/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_792/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_792" + op: "Mul" + input: "Mul_792/x" + input: "clip_by_global_norm/clip_by_global_norm/_146" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_498" + op: "Add" + input: "Mul_791" + input: "Mul_792" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_793/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_793" + op: "Mul" + input: "Mul_793/x" + input: "bert/encoder/layer_8/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_146" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_146" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_794/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_794" + op: "Mul" + input: "Mul_794/x" + input: "Square_146" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_499" + op: "Add" + input: "Mul_793" + input: "Mul_794" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_146" + op: "Sqrt" + input: "add_499" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_500/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_500" + op: "Add" + input: "Sqrt_146" + input: "add_500/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_147" + op: "RealDiv" + input: "add_498" + input: "add_500" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_795" + op: "Mul" + input: "add_2" + input: "truediv_147" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_147" + op: "Sub" + input: "bert/encoder/layer_8/output/dense/bias/read" + input: "mul_795" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_644" + op: "Assign" + input: "bert/encoder/layer_8/output/dense/bias" + input: "sub_147" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_645" + op: "Assign" + input: "bert/encoder/layer_8/output/dense/bias/adam_m" + input: "add_498" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_646" + op: "Assign" + input: "bert/encoder/layer_8/output/dense/bias/adam_v" + input: "add_499" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_796/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_796" + op: "Mul" + input: "Mul_796/x" + input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_797/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_797" + op: "Mul" + input: "Mul_797/x" + input: "clip_by_global_norm/clip_by_global_norm/_147" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_501" + op: "Add" + input: "Mul_796" + input: "Mul_797" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_798/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_798" + op: "Mul" + input: "Mul_798/x" + input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_147" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_147" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_799/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_799" + op: "Mul" + input: "Mul_799/x" + input: "Square_147" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_502" + op: "Add" + input: "Mul_798" + input: "Mul_799" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_147" + op: "Sqrt" + input: "add_502" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_503/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_503" + op: "Add" + input: "Sqrt_147" + input: "add_503/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_148" + op: "RealDiv" + input: "add_501" + input: "add_503" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_800" + op: "Mul" + input: "add_2" + input: "truediv_148" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_148" + op: "Sub" + input: "bert/encoder/layer_8/output/LayerNorm/beta/read" + input: "mul_800" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_647" + op: "Assign" + input: "bert/encoder/layer_8/output/LayerNorm/beta" + input: "sub_148" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_648" + op: "Assign" + input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m" + input: "add_501" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_649" + op: "Assign" + input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v" + input: "add_502" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_801/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_801" + op: "Mul" + input: "Mul_801/x" + input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_802/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_802" + op: "Mul" + input: "Mul_802/x" + input: "clip_by_global_norm/clip_by_global_norm/_148" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_504" + op: "Add" + input: "Mul_801" + input: "Mul_802" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_803/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_803" + op: "Mul" + input: "Mul_803/x" + input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_148" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_148" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_804/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_804" + op: "Mul" + input: "Mul_804/x" + input: "Square_148" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_505" + op: "Add" + input: "Mul_803" + input: "Mul_804" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_148" + op: "Sqrt" + input: "add_505" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_506/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_506" + op: "Add" + input: "Sqrt_148" + input: "add_506/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_149" + op: "RealDiv" + input: "add_504" + input: "add_506" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_805" + op: "Mul" + input: "add_2" + input: "truediv_149" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_149" + op: "Sub" + input: "bert/encoder/layer_8/output/LayerNorm/gamma/read" + input: "mul_805" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_650" + op: "Assign" + input: "bert/encoder/layer_8/output/LayerNorm/gamma" + input: "sub_149" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_651" + op: "Assign" + input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m" + input: "add_504" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_652" + op: "Assign" + input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v" + input: "add_505" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_9/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_9/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_9/attention/self/query/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/self/query/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_9/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_9/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_9/attention/self/query/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/self/query/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_806/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_806" + op: "Mul" + input: "Mul_806/x" + input: "bert/encoder/layer_9/attention/self/query/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_807/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_807" + op: "Mul" + input: "Mul_807/x" + input: "clip_by_global_norm/clip_by_global_norm/_149" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_507" + op: "Add" + input: "Mul_806" + input: "Mul_807" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_808/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_808" + op: "Mul" + input: "Mul_808/x" + input: "bert/encoder/layer_9/attention/self/query/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_149" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_149" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_809/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_809" + op: "Mul" + input: "Mul_809/x" + input: "Square_149" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_508" + op: "Add" + input: "Mul_808" + input: "Mul_809" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_149" + op: "Sqrt" + input: "add_508" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_509/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_509" + op: "Add" + input: "Sqrt_149" + input: "add_509/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_150" + op: "RealDiv" + input: "add_507" + input: "add_509" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_810/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_810" + op: "Mul" + input: "mul_810/x" + input: "bert/encoder/layer_9/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_510" + op: "Add" + input: "truediv_150" + input: "mul_810" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_811" + op: "Mul" + input: "add_2" + input: "add_510" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_150" + op: "Sub" + input: "bert/encoder/layer_9/attention/self/query/kernel/read" + input: "mul_811" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_653" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/query/kernel" + input: "sub_150" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_654" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/query/kernel/adam_m" + input: "add_507" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_655" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/query/kernel/adam_v" + input: "add_508" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_9/attention/self/query/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/self/query/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_9/attention/self/query/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/query/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/self/query/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_812/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_812" + op: "Mul" + input: "Mul_812/x" + input: "bert/encoder/layer_9/attention/self/query/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_813/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_813" + op: "Mul" + input: "Mul_813/x" + input: "clip_by_global_norm/clip_by_global_norm/_150" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_511" + op: "Add" + input: "Mul_812" + input: "Mul_813" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_814/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_814" + op: "Mul" + input: "Mul_814/x" + input: "bert/encoder/layer_9/attention/self/query/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_150" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_150" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_815/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_815" + op: "Mul" + input: "Mul_815/x" + input: "Square_150" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_512" + op: "Add" + input: "Mul_814" + input: "Mul_815" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_150" + op: "Sqrt" + input: "add_512" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_513/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_513" + op: "Add" + input: "Sqrt_150" + input: "add_513/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_151" + op: "RealDiv" + input: "add_511" + input: "add_513" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_816" + op: "Mul" + input: "add_2" + input: "truediv_151" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_151" + op: "Sub" + input: "bert/encoder/layer_9/attention/self/query/bias/read" + input: "mul_816" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_656" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/query/bias" + input: "sub_151" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_657" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/query/bias/adam_m" + input: "add_511" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_658" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/query/bias/adam_v" + input: "add_512" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_9/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_9/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_9/attention/self/key/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/self/key/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_9/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_9/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_9/attention/self/key/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/self/key/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_817/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_817" + op: "Mul" + input: "Mul_817/x" + input: "bert/encoder/layer_9/attention/self/key/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_818/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_818" + op: "Mul" + input: "Mul_818/x" + input: "clip_by_global_norm/clip_by_global_norm/_151" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_514" + op: "Add" + input: "Mul_817" + input: "Mul_818" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_819/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_819" + op: "Mul" + input: "Mul_819/x" + input: "bert/encoder/layer_9/attention/self/key/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_151" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_151" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_820/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_820" + op: "Mul" + input: "Mul_820/x" + input: "Square_151" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_515" + op: "Add" + input: "Mul_819" + input: "Mul_820" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_151" + op: "Sqrt" + input: "add_515" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_516/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_516" + op: "Add" + input: "Sqrt_151" + input: "add_516/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_152" + op: "RealDiv" + input: "add_514" + input: "add_516" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_821/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_821" + op: "Mul" + input: "mul_821/x" + input: "bert/encoder/layer_9/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_517" + op: "Add" + input: "truediv_152" + input: "mul_821" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_822" + op: "Mul" + input: "add_2" + input: "add_517" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_152" + op: "Sub" + input: "bert/encoder/layer_9/attention/self/key/kernel/read" + input: "mul_822" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_659" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/key/kernel" + input: "sub_152" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_660" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/key/kernel/adam_m" + input: "add_514" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_661" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/key/kernel/adam_v" + input: "add_515" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_9/attention/self/key/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/self/key/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_9/attention/self/key/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/key/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/self/key/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_823/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_823" + op: "Mul" + input: "Mul_823/x" + input: "bert/encoder/layer_9/attention/self/key/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_824/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_824" + op: "Mul" + input: "Mul_824/x" + input: "clip_by_global_norm/clip_by_global_norm/_152" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_518" + op: "Add" + input: "Mul_823" + input: "Mul_824" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_825/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_825" + op: "Mul" + input: "Mul_825/x" + input: "bert/encoder/layer_9/attention/self/key/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_152" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_152" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_826/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_826" + op: "Mul" + input: "Mul_826/x" + input: "Square_152" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_519" + op: "Add" + input: "Mul_825" + input: "Mul_826" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_152" + op: "Sqrt" + input: "add_519" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_520/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_520" + op: "Add" + input: "Sqrt_152" + input: "add_520/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_153" + op: "RealDiv" + input: "add_518" + input: "add_520" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_827" + op: "Mul" + input: "add_2" + input: "truediv_153" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_153" + op: "Sub" + input: "bert/encoder/layer_9/attention/self/key/bias/read" + input: "mul_827" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_662" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/key/bias" + input: "sub_153" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_663" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/key/bias/adam_m" + input: "add_518" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_664" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/key/bias/adam_v" + input: "add_519" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_9/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_9/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_9/attention/self/value/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/self/value/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_9/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_9/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_9/attention/self/value/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/self/value/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_828/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_828" + op: "Mul" + input: "Mul_828/x" + input: "bert/encoder/layer_9/attention/self/value/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_829/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_829" + op: "Mul" + input: "Mul_829/x" + input: "clip_by_global_norm/clip_by_global_norm/_153" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_521" + op: "Add" + input: "Mul_828" + input: "Mul_829" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_830/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_830" + op: "Mul" + input: "Mul_830/x" + input: "bert/encoder/layer_9/attention/self/value/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_153" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_153" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_831/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_831" + op: "Mul" + input: "Mul_831/x" + input: "Square_153" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_522" + op: "Add" + input: "Mul_830" + input: "Mul_831" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_153" + op: "Sqrt" + input: "add_522" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_523/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_523" + op: "Add" + input: "Sqrt_153" + input: "add_523/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_154" + op: "RealDiv" + input: "add_521" + input: "add_523" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_832/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_832" + op: "Mul" + input: "mul_832/x" + input: "bert/encoder/layer_9/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_524" + op: "Add" + input: "truediv_154" + input: "mul_832" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_833" + op: "Mul" + input: "add_2" + input: "add_524" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_154" + op: "Sub" + input: "bert/encoder/layer_9/attention/self/value/kernel/read" + input: "mul_833" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_665" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/value/kernel" + input: "sub_154" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_666" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/value/kernel/adam_m" + input: "add_521" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_667" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/value/kernel/adam_v" + input: "add_522" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_9/attention/self/value/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/self/value/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_9/attention/self/value/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/self/value/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/self/value/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_834/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_834" + op: "Mul" + input: "Mul_834/x" + input: "bert/encoder/layer_9/attention/self/value/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_835/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_835" + op: "Mul" + input: "Mul_835/x" + input: "clip_by_global_norm/clip_by_global_norm/_154" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_525" + op: "Add" + input: "Mul_834" + input: "Mul_835" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_836/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_836" + op: "Mul" + input: "Mul_836/x" + input: "bert/encoder/layer_9/attention/self/value/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_154" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_154" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_837/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_837" + op: "Mul" + input: "Mul_837/x" + input: "Square_154" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_526" + op: "Add" + input: "Mul_836" + input: "Mul_837" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_154" + op: "Sqrt" + input: "add_526" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_527/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_527" + op: "Add" + input: "Sqrt_154" + input: "add_527/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_155" + op: "RealDiv" + input: "add_525" + input: "add_527" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_838" + op: "Mul" + input: "add_2" + input: "truediv_155" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_155" + op: "Sub" + input: "bert/encoder/layer_9/attention/self/value/bias/read" + input: "mul_838" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_668" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/value/bias" + input: "sub_155" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_669" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/value/bias/adam_m" + input: "add_525" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_670" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/value/bias/adam_v" + input: "add_526" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_839/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_839" + op: "Mul" + input: "Mul_839/x" + input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_840/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_840" + op: "Mul" + input: "Mul_840/x" + input: "clip_by_global_norm/clip_by_global_norm/_155" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_528" + op: "Add" + input: "Mul_839" + input: "Mul_840" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_841/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_841" + op: "Mul" + input: "Mul_841/x" + input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_155" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_155" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_842/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_842" + op: "Mul" + input: "Mul_842/x" + input: "Square_155" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_529" + op: "Add" + input: "Mul_841" + input: "Mul_842" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_155" + op: "Sqrt" + input: "add_529" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_530/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_530" + op: "Add" + input: "Sqrt_155" + input: "add_530/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_156" + op: "RealDiv" + input: "add_528" + input: "add_530" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_843/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_843" + op: "Mul" + input: "mul_843/x" + input: "bert/encoder/layer_9/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_531" + op: "Add" + input: "truediv_156" + input: "mul_843" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_844" + op: "Mul" + input: "add_2" + input: "add_531" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_156" + op: "Sub" + input: "bert/encoder/layer_9/attention/output/dense/kernel/read" + input: "mul_844" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_671" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/dense/kernel" + input: "sub_156" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_672" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m" + input: "add_528" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_673" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v" + input: "add_529" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_9/attention/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_9/attention/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_845/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_845" + op: "Mul" + input: "Mul_845/x" + input: "bert/encoder/layer_9/attention/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_846/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_846" + op: "Mul" + input: "Mul_846/x" + input: "clip_by_global_norm/clip_by_global_norm/_156" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_532" + op: "Add" + input: "Mul_845" + input: "Mul_846" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_847/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_847" + op: "Mul" + input: "Mul_847/x" + input: "bert/encoder/layer_9/attention/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_156" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_156" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_848/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_848" + op: "Mul" + input: "Mul_848/x" + input: "Square_156" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_533" + op: "Add" + input: "Mul_847" + input: "Mul_848" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_156" + op: "Sqrt" + input: "add_533" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_534/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_534" + op: "Add" + input: "Sqrt_156" + input: "add_534/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_157" + op: "RealDiv" + input: "add_532" + input: "add_534" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_849" + op: "Mul" + input: "add_2" + input: "truediv_157" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_157" + op: "Sub" + input: "bert/encoder/layer_9/attention/output/dense/bias/read" + input: "mul_849" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_674" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/dense/bias" + input: "sub_157" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_675" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/dense/bias/adam_m" + input: "add_532" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_676" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/dense/bias/adam_v" + input: "add_533" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_850/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_850" + op: "Mul" + input: "Mul_850/x" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_851/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_851" + op: "Mul" + input: "Mul_851/x" + input: "clip_by_global_norm/clip_by_global_norm/_157" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_535" + op: "Add" + input: "Mul_850" + input: "Mul_851" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_852/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_852" + op: "Mul" + input: "Mul_852/x" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_157" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_157" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_853/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_853" + op: "Mul" + input: "Mul_853/x" + input: "Square_157" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_536" + op: "Add" + input: "Mul_852" + input: "Mul_853" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_157" + op: "Sqrt" + input: "add_536" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_537/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_537" + op: "Add" + input: "Sqrt_157" + input: "add_537/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_158" + op: "RealDiv" + input: "add_535" + input: "add_537" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_854" + op: "Mul" + input: "add_2" + input: "truediv_158" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_158" + op: "Sub" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/read" + input: "mul_854" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_677" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta" + input: "sub_158" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_678" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m" + input: "add_535" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_679" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v" + input: "add_536" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_855/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_855" + op: "Mul" + input: "Mul_855/x" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_856/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_856" + op: "Mul" + input: "Mul_856/x" + input: "clip_by_global_norm/clip_by_global_norm/_158" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_538" + op: "Add" + input: "Mul_855" + input: "Mul_856" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_857/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_857" + op: "Mul" + input: "Mul_857/x" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_158" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_158" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_858/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_858" + op: "Mul" + input: "Mul_858/x" + input: "Square_158" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_539" + op: "Add" + input: "Mul_857" + input: "Mul_858" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_158" + op: "Sqrt" + input: "add_539" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_540/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_540" + op: "Add" + input: "Sqrt_158" + input: "add_540/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_159" + op: "RealDiv" + input: "add_538" + input: "add_540" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_859" + op: "Mul" + input: "add_2" + input: "truediv_159" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_159" + op: "Sub" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/read" + input: "mul_859" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_680" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma" + input: "sub_159" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_681" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m" + input: "add_538" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_682" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v" + input: "add_539" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_860/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_860" + op: "Mul" + input: "Mul_860/x" + input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_861/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_861" + op: "Mul" + input: "Mul_861/x" + input: "clip_by_global_norm/clip_by_global_norm/_159" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_541" + op: "Add" + input: "Mul_860" + input: "Mul_861" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_862/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_862" + op: "Mul" + input: "Mul_862/x" + input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_159" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_159" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_863/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_863" + op: "Mul" + input: "Mul_863/x" + input: "Square_159" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_542" + op: "Add" + input: "Mul_862" + input: "Mul_863" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_159" + op: "Sqrt" + input: "add_542" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_543/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_543" + op: "Add" + input: "Sqrt_159" + input: "add_543/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_160" + op: "RealDiv" + input: "add_541" + input: "add_543" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_864/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_864" + op: "Mul" + input: "mul_864/x" + input: "bert/encoder/layer_9/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_544" + op: "Add" + input: "truediv_160" + input: "mul_864" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_865" + op: "Mul" + input: "add_2" + input: "add_544" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_160" + op: "Sub" + input: "bert/encoder/layer_9/intermediate/dense/kernel/read" + input: "mul_865" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_683" + op: "Assign" + input: "bert/encoder/layer_9/intermediate/dense/kernel" + input: "sub_160" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_684" + op: "Assign" + input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m" + input: "add_541" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_685" + op: "Assign" + input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v" + input: "add_542" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/bias/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_9/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_9/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_9/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_9/intermediate/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_9/intermediate/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/bias/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_9/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_9/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_9/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_9/intermediate/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/intermediate/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_9/intermediate/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_866/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_866" + op: "Mul" + input: "Mul_866/x" + input: "bert/encoder/layer_9/intermediate/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_867/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_867" + op: "Mul" + input: "Mul_867/x" + input: "clip_by_global_norm/clip_by_global_norm/_160" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_545" + op: "Add" + input: "Mul_866" + input: "Mul_867" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_868/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_868" + op: "Mul" + input: "Mul_868/x" + input: "bert/encoder/layer_9/intermediate/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_160" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_160" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_869/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_869" + op: "Mul" + input: "Mul_869/x" + input: "Square_160" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_546" + op: "Add" + input: "Mul_868" + input: "Mul_869" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_160" + op: "Sqrt" + input: "add_546" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_547/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_547" + op: "Add" + input: "Sqrt_160" + input: "add_547/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_161" + op: "RealDiv" + input: "add_545" + input: "add_547" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_870" + op: "Mul" + input: "add_2" + input: "truediv_161" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_161" + op: "Sub" + input: "bert/encoder/layer_9/intermediate/dense/bias/read" + input: "mul_870" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_686" + op: "Assign" + input: "bert/encoder/layer_9/intermediate/dense/bias" + input: "sub_161" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_687" + op: "Assign" + input: "bert/encoder/layer_9/intermediate/dense/bias/adam_m" + input: "add_545" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_688" + op: "Assign" + input: "bert/encoder/layer_9/intermediate/dense/bias/adam_v" + input: "add_546" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_9/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_9/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_9/output/dense/kernel/adam_m" + input: "bert/encoder/layer_9/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_9/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_9/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_9/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_9/output/dense/kernel/adam_v" + input: "bert/encoder/layer_9/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_9/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_871/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_871" + op: "Mul" + input: "Mul_871/x" + input: "bert/encoder/layer_9/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_872/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_872" + op: "Mul" + input: "Mul_872/x" + input: "clip_by_global_norm/clip_by_global_norm/_161" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_548" + op: "Add" + input: "Mul_871" + input: "Mul_872" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_873/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_873" + op: "Mul" + input: "Mul_873/x" + input: "bert/encoder/layer_9/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_161" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_161" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_874/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_874" + op: "Mul" + input: "Mul_874/x" + input: "Square_161" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_549" + op: "Add" + input: "Mul_873" + input: "Mul_874" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_161" + op: "Sqrt" + input: "add_549" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_550/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_550" + op: "Add" + input: "Sqrt_161" + input: "add_550/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_162" + op: "RealDiv" + input: "add_548" + input: "add_550" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_875/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_875" + op: "Mul" + input: "mul_875/x" + input: "bert/encoder/layer_9/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_551" + op: "Add" + input: "truediv_162" + input: "mul_875" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_876" + op: "Mul" + input: "add_2" + input: "add_551" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_162" + op: "Sub" + input: "bert/encoder/layer_9/output/dense/kernel/read" + input: "mul_876" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_689" + op: "Assign" + input: "bert/encoder/layer_9/output/dense/kernel" + input: "sub_162" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_690" + op: "Assign" + input: "bert/encoder/layer_9/output/dense/kernel/adam_m" + input: "add_548" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_691" + op: "Assign" + input: "bert/encoder/layer_9/output/dense/kernel/adam_v" + input: "add_549" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_9/output/dense/bias/adam_m" + input: "bert/encoder/layer_9/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_9/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_9/output/dense/bias/adam_v" + input: "bert/encoder/layer_9/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_9/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_877/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_877" + op: "Mul" + input: "Mul_877/x" + input: "bert/encoder/layer_9/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_878/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_878" + op: "Mul" + input: "Mul_878/x" + input: "clip_by_global_norm/clip_by_global_norm/_162" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_552" + op: "Add" + input: "Mul_877" + input: "Mul_878" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_879/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_879" + op: "Mul" + input: "Mul_879/x" + input: "bert/encoder/layer_9/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_162" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_162" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_880/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_880" + op: "Mul" + input: "Mul_880/x" + input: "Square_162" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_553" + op: "Add" + input: "Mul_879" + input: "Mul_880" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_162" + op: "Sqrt" + input: "add_553" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_554/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_554" + op: "Add" + input: "Sqrt_162" + input: "add_554/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_163" + op: "RealDiv" + input: "add_552" + input: "add_554" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_881" + op: "Mul" + input: "add_2" + input: "truediv_163" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_163" + op: "Sub" + input: "bert/encoder/layer_9/output/dense/bias/read" + input: "mul_881" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_692" + op: "Assign" + input: "bert/encoder/layer_9/output/dense/bias" + input: "sub_163" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_693" + op: "Assign" + input: "bert/encoder/layer_9/output/dense/bias/adam_m" + input: "add_552" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_694" + op: "Assign" + input: "bert/encoder/layer_9/output/dense/bias/adam_v" + input: "add_553" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_882/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_882" + op: "Mul" + input: "Mul_882/x" + input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_883/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_883" + op: "Mul" + input: "Mul_883/x" + input: "clip_by_global_norm/clip_by_global_norm/_163" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_555" + op: "Add" + input: "Mul_882" + input: "Mul_883" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_884/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_884" + op: "Mul" + input: "Mul_884/x" + input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_163" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_163" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_885/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_885" + op: "Mul" + input: "Mul_885/x" + input: "Square_163" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_556" + op: "Add" + input: "Mul_884" + input: "Mul_885" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_163" + op: "Sqrt" + input: "add_556" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_557/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_557" + op: "Add" + input: "Sqrt_163" + input: "add_557/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_164" + op: "RealDiv" + input: "add_555" + input: "add_557" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_886" + op: "Mul" + input: "add_2" + input: "truediv_164" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_164" + op: "Sub" + input: "bert/encoder/layer_9/output/LayerNorm/beta/read" + input: "mul_886" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_695" + op: "Assign" + input: "bert/encoder/layer_9/output/LayerNorm/beta" + input: "sub_164" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_696" + op: "Assign" + input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m" + input: "add_555" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_697" + op: "Assign" + input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v" + input: "add_556" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_887/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_887" + op: "Mul" + input: "Mul_887/x" + input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_888/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_888" + op: "Mul" + input: "Mul_888/x" + input: "clip_by_global_norm/clip_by_global_norm/_164" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_558" + op: "Add" + input: "Mul_887" + input: "Mul_888" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_889/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_889" + op: "Mul" + input: "Mul_889/x" + input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_164" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_164" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_890/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_890" + op: "Mul" + input: "Mul_890/x" + input: "Square_164" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_559" + op: "Add" + input: "Mul_889" + input: "Mul_890" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_164" + op: "Sqrt" + input: "add_559" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_560/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_560" + op: "Add" + input: "Sqrt_164" + input: "add_560/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_165" + op: "RealDiv" + input: "add_558" + input: "add_560" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_891" + op: "Mul" + input: "add_2" + input: "truediv_165" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_165" + op: "Sub" + input: "bert/encoder/layer_9/output/LayerNorm/gamma/read" + input: "mul_891" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_698" + op: "Assign" + input: "bert/encoder/layer_9/output/LayerNorm/gamma" + input: "sub_165" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_699" + op: "Assign" + input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m" + input: "add_558" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_700" + op: "Assign" + input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v" + input: "add_559" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_10/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_10/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_10/attention/self/query/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/self/query/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_10/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_10/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_10/attention/self/query/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/self/query/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_892/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_892" + op: "Mul" + input: "Mul_892/x" + input: "bert/encoder/layer_10/attention/self/query/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_893/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_893" + op: "Mul" + input: "Mul_893/x" + input: "clip_by_global_norm/clip_by_global_norm/_165" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_561" + op: "Add" + input: "Mul_892" + input: "Mul_893" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_894/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_894" + op: "Mul" + input: "Mul_894/x" + input: "bert/encoder/layer_10/attention/self/query/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_165" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_165" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_895/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_895" + op: "Mul" + input: "Mul_895/x" + input: "Square_165" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_562" + op: "Add" + input: "Mul_894" + input: "Mul_895" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_165" + op: "Sqrt" + input: "add_562" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_563/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_563" + op: "Add" + input: "Sqrt_165" + input: "add_563/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_166" + op: "RealDiv" + input: "add_561" + input: "add_563" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_896/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_896" + op: "Mul" + input: "mul_896/x" + input: "bert/encoder/layer_10/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_564" + op: "Add" + input: "truediv_166" + input: "mul_896" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_897" + op: "Mul" + input: "add_2" + input: "add_564" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_166" + op: "Sub" + input: "bert/encoder/layer_10/attention/self/query/kernel/read" + input: "mul_897" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_701" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/query/kernel" + input: "sub_166" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_702" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/query/kernel/adam_m" + input: "add_561" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_703" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/query/kernel/adam_v" + input: "add_562" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_10/attention/self/query/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/self/query/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_10/attention/self/query/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/query/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/self/query/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_898/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_898" + op: "Mul" + input: "Mul_898/x" + input: "bert/encoder/layer_10/attention/self/query/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_899/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_899" + op: "Mul" + input: "Mul_899/x" + input: "clip_by_global_norm/clip_by_global_norm/_166" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_565" + op: "Add" + input: "Mul_898" + input: "Mul_899" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_900/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_900" + op: "Mul" + input: "Mul_900/x" + input: "bert/encoder/layer_10/attention/self/query/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_166" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_166" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_901/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_901" + op: "Mul" + input: "Mul_901/x" + input: "Square_166" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_566" + op: "Add" + input: "Mul_900" + input: "Mul_901" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_166" + op: "Sqrt" + input: "add_566" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_567/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_567" + op: "Add" + input: "Sqrt_166" + input: "add_567/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_167" + op: "RealDiv" + input: "add_565" + input: "add_567" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_902" + op: "Mul" + input: "add_2" + input: "truediv_167" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_167" + op: "Sub" + input: "bert/encoder/layer_10/attention/self/query/bias/read" + input: "mul_902" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_704" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/query/bias" + input: "sub_167" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_705" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/query/bias/adam_m" + input: "add_565" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_706" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/query/bias/adam_v" + input: "add_566" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_10/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_10/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_10/attention/self/key/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/self/key/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_10/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_10/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_10/attention/self/key/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/self/key/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_903/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_903" + op: "Mul" + input: "Mul_903/x" + input: "bert/encoder/layer_10/attention/self/key/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_904/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_904" + op: "Mul" + input: "Mul_904/x" + input: "clip_by_global_norm/clip_by_global_norm/_167" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_568" + op: "Add" + input: "Mul_903" + input: "Mul_904" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_905/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_905" + op: "Mul" + input: "Mul_905/x" + input: "bert/encoder/layer_10/attention/self/key/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_167" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_167" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_906/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_906" + op: "Mul" + input: "Mul_906/x" + input: "Square_167" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_569" + op: "Add" + input: "Mul_905" + input: "Mul_906" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_167" + op: "Sqrt" + input: "add_569" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_570/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_570" + op: "Add" + input: "Sqrt_167" + input: "add_570/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_168" + op: "RealDiv" + input: "add_568" + input: "add_570" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_907/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_907" + op: "Mul" + input: "mul_907/x" + input: "bert/encoder/layer_10/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_571" + op: "Add" + input: "truediv_168" + input: "mul_907" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_908" + op: "Mul" + input: "add_2" + input: "add_571" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_168" + op: "Sub" + input: "bert/encoder/layer_10/attention/self/key/kernel/read" + input: "mul_908" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_707" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/key/kernel" + input: "sub_168" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_708" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/key/kernel/adam_m" + input: "add_568" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_709" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/key/kernel/adam_v" + input: "add_569" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_10/attention/self/key/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/self/key/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_10/attention/self/key/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/key/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/self/key/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_909/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_909" + op: "Mul" + input: "Mul_909/x" + input: "bert/encoder/layer_10/attention/self/key/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_910/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_910" + op: "Mul" + input: "Mul_910/x" + input: "clip_by_global_norm/clip_by_global_norm/_168" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_572" + op: "Add" + input: "Mul_909" + input: "Mul_910" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_911/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_911" + op: "Mul" + input: "Mul_911/x" + input: "bert/encoder/layer_10/attention/self/key/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_168" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_168" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_912/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_912" + op: "Mul" + input: "Mul_912/x" + input: "Square_168" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_573" + op: "Add" + input: "Mul_911" + input: "Mul_912" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_168" + op: "Sqrt" + input: "add_573" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_574/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_574" + op: "Add" + input: "Sqrt_168" + input: "add_574/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_169" + op: "RealDiv" + input: "add_572" + input: "add_574" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_913" + op: "Mul" + input: "add_2" + input: "truediv_169" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_169" + op: "Sub" + input: "bert/encoder/layer_10/attention/self/key/bias/read" + input: "mul_913" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_710" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/key/bias" + input: "sub_169" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_711" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/key/bias/adam_m" + input: "add_572" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_712" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/key/bias/adam_v" + input: "add_573" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_10/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_10/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_10/attention/self/value/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/self/value/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_10/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_10/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_10/attention/self/value/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/self/value/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_914/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_914" + op: "Mul" + input: "Mul_914/x" + input: "bert/encoder/layer_10/attention/self/value/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_915/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_915" + op: "Mul" + input: "Mul_915/x" + input: "clip_by_global_norm/clip_by_global_norm/_169" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_575" + op: "Add" + input: "Mul_914" + input: "Mul_915" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_916/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_916" + op: "Mul" + input: "Mul_916/x" + input: "bert/encoder/layer_10/attention/self/value/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_169" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_169" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_917/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_917" + op: "Mul" + input: "Mul_917/x" + input: "Square_169" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_576" + op: "Add" + input: "Mul_916" + input: "Mul_917" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_169" + op: "Sqrt" + input: "add_576" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_577/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_577" + op: "Add" + input: "Sqrt_169" + input: "add_577/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_170" + op: "RealDiv" + input: "add_575" + input: "add_577" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_918/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_918" + op: "Mul" + input: "mul_918/x" + input: "bert/encoder/layer_10/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_578" + op: "Add" + input: "truediv_170" + input: "mul_918" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_919" + op: "Mul" + input: "add_2" + input: "add_578" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_170" + op: "Sub" + input: "bert/encoder/layer_10/attention/self/value/kernel/read" + input: "mul_919" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_713" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/value/kernel" + input: "sub_170" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_714" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/value/kernel/adam_m" + input: "add_575" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_715" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/value/kernel/adam_v" + input: "add_576" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_10/attention/self/value/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/self/value/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_10/attention/self/value/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/self/value/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/self/value/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_920/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_920" + op: "Mul" + input: "Mul_920/x" + input: "bert/encoder/layer_10/attention/self/value/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_921/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_921" + op: "Mul" + input: "Mul_921/x" + input: "clip_by_global_norm/clip_by_global_norm/_170" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_579" + op: "Add" + input: "Mul_920" + input: "Mul_921" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_922/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_922" + op: "Mul" + input: "Mul_922/x" + input: "bert/encoder/layer_10/attention/self/value/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_170" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_170" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_923/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_923" + op: "Mul" + input: "Mul_923/x" + input: "Square_170" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_580" + op: "Add" + input: "Mul_922" + input: "Mul_923" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_170" + op: "Sqrt" + input: "add_580" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_581/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_581" + op: "Add" + input: "Sqrt_170" + input: "add_581/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_171" + op: "RealDiv" + input: "add_579" + input: "add_581" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_924" + op: "Mul" + input: "add_2" + input: "truediv_171" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_171" + op: "Sub" + input: "bert/encoder/layer_10/attention/self/value/bias/read" + input: "mul_924" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_716" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/value/bias" + input: "sub_171" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_717" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/value/bias/adam_m" + input: "add_579" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_718" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/value/bias/adam_v" + input: "add_580" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_925/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_925" + op: "Mul" + input: "Mul_925/x" + input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_926/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_926" + op: "Mul" + input: "Mul_926/x" + input: "clip_by_global_norm/clip_by_global_norm/_171" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_582" + op: "Add" + input: "Mul_925" + input: "Mul_926" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_927/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_927" + op: "Mul" + input: "Mul_927/x" + input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_171" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_171" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_928/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_928" + op: "Mul" + input: "Mul_928/x" + input: "Square_171" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_583" + op: "Add" + input: "Mul_927" + input: "Mul_928" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_171" + op: "Sqrt" + input: "add_583" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_584/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_584" + op: "Add" + input: "Sqrt_171" + input: "add_584/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_172" + op: "RealDiv" + input: "add_582" + input: "add_584" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_929/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_929" + op: "Mul" + input: "mul_929/x" + input: "bert/encoder/layer_10/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_585" + op: "Add" + input: "truediv_172" + input: "mul_929" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_930" + op: "Mul" + input: "add_2" + input: "add_585" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_172" + op: "Sub" + input: "bert/encoder/layer_10/attention/output/dense/kernel/read" + input: "mul_930" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_719" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/dense/kernel" + input: "sub_172" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_720" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m" + input: "add_582" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_721" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v" + input: "add_583" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_10/attention/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_10/attention/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_931/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_931" + op: "Mul" + input: "Mul_931/x" + input: "bert/encoder/layer_10/attention/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_932/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_932" + op: "Mul" + input: "Mul_932/x" + input: "clip_by_global_norm/clip_by_global_norm/_172" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_586" + op: "Add" + input: "Mul_931" + input: "Mul_932" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_933/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_933" + op: "Mul" + input: "Mul_933/x" + input: "bert/encoder/layer_10/attention/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_172" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_172" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_934/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_934" + op: "Mul" + input: "Mul_934/x" + input: "Square_172" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_587" + op: "Add" + input: "Mul_933" + input: "Mul_934" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_172" + op: "Sqrt" + input: "add_587" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_588/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_588" + op: "Add" + input: "Sqrt_172" + input: "add_588/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_173" + op: "RealDiv" + input: "add_586" + input: "add_588" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_935" + op: "Mul" + input: "add_2" + input: "truediv_173" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_173" + op: "Sub" + input: "bert/encoder/layer_10/attention/output/dense/bias/read" + input: "mul_935" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_722" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/dense/bias" + input: "sub_173" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_723" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/dense/bias/adam_m" + input: "add_586" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_724" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/dense/bias/adam_v" + input: "add_587" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_936/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_936" + op: "Mul" + input: "Mul_936/x" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_937/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_937" + op: "Mul" + input: "Mul_937/x" + input: "clip_by_global_norm/clip_by_global_norm/_173" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_589" + op: "Add" + input: "Mul_936" + input: "Mul_937" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_938/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_938" + op: "Mul" + input: "Mul_938/x" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_173" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_173" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_939/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_939" + op: "Mul" + input: "Mul_939/x" + input: "Square_173" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_590" + op: "Add" + input: "Mul_938" + input: "Mul_939" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_173" + op: "Sqrt" + input: "add_590" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_591/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_591" + op: "Add" + input: "Sqrt_173" + input: "add_591/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_174" + op: "RealDiv" + input: "add_589" + input: "add_591" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_940" + op: "Mul" + input: "add_2" + input: "truediv_174" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_174" + op: "Sub" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/read" + input: "mul_940" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_725" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta" + input: "sub_174" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_726" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m" + input: "add_589" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_727" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v" + input: "add_590" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_941/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_941" + op: "Mul" + input: "Mul_941/x" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_942/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_942" + op: "Mul" + input: "Mul_942/x" + input: "clip_by_global_norm/clip_by_global_norm/_174" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_592" + op: "Add" + input: "Mul_941" + input: "Mul_942" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_943/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_943" + op: "Mul" + input: "Mul_943/x" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_174" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_174" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_944/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_944" + op: "Mul" + input: "Mul_944/x" + input: "Square_174" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_593" + op: "Add" + input: "Mul_943" + input: "Mul_944" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_174" + op: "Sqrt" + input: "add_593" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_594/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_594" + op: "Add" + input: "Sqrt_174" + input: "add_594/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_175" + op: "RealDiv" + input: "add_592" + input: "add_594" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_945" + op: "Mul" + input: "add_2" + input: "truediv_175" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_175" + op: "Sub" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/read" + input: "mul_945" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_728" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma" + input: "sub_175" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_729" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m" + input: "add_592" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_730" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v" + input: "add_593" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_946/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_946" + op: "Mul" + input: "Mul_946/x" + input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_947/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_947" + op: "Mul" + input: "Mul_947/x" + input: "clip_by_global_norm/clip_by_global_norm/_175" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_595" + op: "Add" + input: "Mul_946" + input: "Mul_947" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_948/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_948" + op: "Mul" + input: "Mul_948/x" + input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_175" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_175" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_949/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_949" + op: "Mul" + input: "Mul_949/x" + input: "Square_175" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_596" + op: "Add" + input: "Mul_948" + input: "Mul_949" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_175" + op: "Sqrt" + input: "add_596" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_597/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_597" + op: "Add" + input: "Sqrt_175" + input: "add_597/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_176" + op: "RealDiv" + input: "add_595" + input: "add_597" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_950/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_950" + op: "Mul" + input: "mul_950/x" + input: "bert/encoder/layer_10/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_598" + op: "Add" + input: "truediv_176" + input: "mul_950" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_951" + op: "Mul" + input: "add_2" + input: "add_598" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_176" + op: "Sub" + input: "bert/encoder/layer_10/intermediate/dense/kernel/read" + input: "mul_951" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_731" + op: "Assign" + input: "bert/encoder/layer_10/intermediate/dense/kernel" + input: "sub_176" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_732" + op: "Assign" + input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m" + input: "add_595" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_733" + op: "Assign" + input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v" + input: "add_596" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/bias/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_10/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_10/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_10/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_10/intermediate/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_10/intermediate/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/bias/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_10/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_10/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_10/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_10/intermediate/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/intermediate/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_10/intermediate/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_952/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_952" + op: "Mul" + input: "Mul_952/x" + input: "bert/encoder/layer_10/intermediate/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_953/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_953" + op: "Mul" + input: "Mul_953/x" + input: "clip_by_global_norm/clip_by_global_norm/_176" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_599" + op: "Add" + input: "Mul_952" + input: "Mul_953" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_954/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_954" + op: "Mul" + input: "Mul_954/x" + input: "bert/encoder/layer_10/intermediate/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_176" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_176" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_955/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_955" + op: "Mul" + input: "Mul_955/x" + input: "Square_176" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_600" + op: "Add" + input: "Mul_954" + input: "Mul_955" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_176" + op: "Sqrt" + input: "add_600" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_601/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_601" + op: "Add" + input: "Sqrt_176" + input: "add_601/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_177" + op: "RealDiv" + input: "add_599" + input: "add_601" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_956" + op: "Mul" + input: "add_2" + input: "truediv_177" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_177" + op: "Sub" + input: "bert/encoder/layer_10/intermediate/dense/bias/read" + input: "mul_956" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_734" + op: "Assign" + input: "bert/encoder/layer_10/intermediate/dense/bias" + input: "sub_177" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_735" + op: "Assign" + input: "bert/encoder/layer_10/intermediate/dense/bias/adam_m" + input: "add_599" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_736" + op: "Assign" + input: "bert/encoder/layer_10/intermediate/dense/bias/adam_v" + input: "add_600" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_10/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_10/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_10/output/dense/kernel/adam_m" + input: "bert/encoder/layer_10/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_10/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_10/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_10/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_10/output/dense/kernel/adam_v" + input: "bert/encoder/layer_10/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_10/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_957/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_957" + op: "Mul" + input: "Mul_957/x" + input: "bert/encoder/layer_10/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_958/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_958" + op: "Mul" + input: "Mul_958/x" + input: "clip_by_global_norm/clip_by_global_norm/_177" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_602" + op: "Add" + input: "Mul_957" + input: "Mul_958" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_959/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_959" + op: "Mul" + input: "Mul_959/x" + input: "bert/encoder/layer_10/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_177" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_177" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_960/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_960" + op: "Mul" + input: "Mul_960/x" + input: "Square_177" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_603" + op: "Add" + input: "Mul_959" + input: "Mul_960" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_177" + op: "Sqrt" + input: "add_603" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_604/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_604" + op: "Add" + input: "Sqrt_177" + input: "add_604/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_178" + op: "RealDiv" + input: "add_602" + input: "add_604" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_961/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_961" + op: "Mul" + input: "mul_961/x" + input: "bert/encoder/layer_10/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_605" + op: "Add" + input: "truediv_178" + input: "mul_961" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_962" + op: "Mul" + input: "add_2" + input: "add_605" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_178" + op: "Sub" + input: "bert/encoder/layer_10/output/dense/kernel/read" + input: "mul_962" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_737" + op: "Assign" + input: "bert/encoder/layer_10/output/dense/kernel" + input: "sub_178" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_738" + op: "Assign" + input: "bert/encoder/layer_10/output/dense/kernel/adam_m" + input: "add_602" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_739" + op: "Assign" + input: "bert/encoder/layer_10/output/dense/kernel/adam_v" + input: "add_603" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_10/output/dense/bias/adam_m" + input: "bert/encoder/layer_10/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_10/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_10/output/dense/bias/adam_v" + input: "bert/encoder/layer_10/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_10/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_963/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_963" + op: "Mul" + input: "Mul_963/x" + input: "bert/encoder/layer_10/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_964/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_964" + op: "Mul" + input: "Mul_964/x" + input: "clip_by_global_norm/clip_by_global_norm/_178" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_606" + op: "Add" + input: "Mul_963" + input: "Mul_964" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_965/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_965" + op: "Mul" + input: "Mul_965/x" + input: "bert/encoder/layer_10/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_178" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_178" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_966/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_966" + op: "Mul" + input: "Mul_966/x" + input: "Square_178" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_607" + op: "Add" + input: "Mul_965" + input: "Mul_966" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_178" + op: "Sqrt" + input: "add_607" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_608/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_608" + op: "Add" + input: "Sqrt_178" + input: "add_608/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_179" + op: "RealDiv" + input: "add_606" + input: "add_608" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_967" + op: "Mul" + input: "add_2" + input: "truediv_179" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_179" + op: "Sub" + input: "bert/encoder/layer_10/output/dense/bias/read" + input: "mul_967" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_740" + op: "Assign" + input: "bert/encoder/layer_10/output/dense/bias" + input: "sub_179" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_741" + op: "Assign" + input: "bert/encoder/layer_10/output/dense/bias/adam_m" + input: "add_606" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_742" + op: "Assign" + input: "bert/encoder/layer_10/output/dense/bias/adam_v" + input: "add_607" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_968/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_968" + op: "Mul" + input: "Mul_968/x" + input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_969/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_969" + op: "Mul" + input: "Mul_969/x" + input: "clip_by_global_norm/clip_by_global_norm/_179" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_609" + op: "Add" + input: "Mul_968" + input: "Mul_969" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_970/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_970" + op: "Mul" + input: "Mul_970/x" + input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_179" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_179" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_971/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_971" + op: "Mul" + input: "Mul_971/x" + input: "Square_179" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_610" + op: "Add" + input: "Mul_970" + input: "Mul_971" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_179" + op: "Sqrt" + input: "add_610" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_611/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_611" + op: "Add" + input: "Sqrt_179" + input: "add_611/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_180" + op: "RealDiv" + input: "add_609" + input: "add_611" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_972" + op: "Mul" + input: "add_2" + input: "truediv_180" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_180" + op: "Sub" + input: "bert/encoder/layer_10/output/LayerNorm/beta/read" + input: "mul_972" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_743" + op: "Assign" + input: "bert/encoder/layer_10/output/LayerNorm/beta" + input: "sub_180" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_744" + op: "Assign" + input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m" + input: "add_609" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_745" + op: "Assign" + input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v" + input: "add_610" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_973/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_973" + op: "Mul" + input: "Mul_973/x" + input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_974/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_974" + op: "Mul" + input: "Mul_974/x" + input: "clip_by_global_norm/clip_by_global_norm/_180" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_612" + op: "Add" + input: "Mul_973" + input: "Mul_974" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_975/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_975" + op: "Mul" + input: "Mul_975/x" + input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_180" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_180" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_976/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_976" + op: "Mul" + input: "Mul_976/x" + input: "Square_180" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_613" + op: "Add" + input: "Mul_975" + input: "Mul_976" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_180" + op: "Sqrt" + input: "add_613" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_614/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_614" + op: "Add" + input: "Sqrt_180" + input: "add_614/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_181" + op: "RealDiv" + input: "add_612" + input: "add_614" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_977" + op: "Mul" + input: "add_2" + input: "truediv_181" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_181" + op: "Sub" + input: "bert/encoder/layer_10/output/LayerNorm/gamma/read" + input: "mul_977" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_746" + op: "Assign" + input: "bert/encoder/layer_10/output/LayerNorm/gamma" + input: "sub_181" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_747" + op: "Assign" + input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m" + input: "add_612" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_748" + op: "Assign" + input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v" + input: "add_613" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_11/attention/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_11/attention/self/query/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_11/attention/self/query/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/self/query/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_11/attention/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_11/attention/self/query/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_11/attention/self/query/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/self/query/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_978/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_978" + op: "Mul" + input: "Mul_978/x" + input: "bert/encoder/layer_11/attention/self/query/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_979/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_979" + op: "Mul" + input: "Mul_979/x" + input: "clip_by_global_norm/clip_by_global_norm/_181" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_615" + op: "Add" + input: "Mul_978" + input: "Mul_979" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_980/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_980" + op: "Mul" + input: "Mul_980/x" + input: "bert/encoder/layer_11/attention/self/query/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_181" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_181" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_981/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_981" + op: "Mul" + input: "Mul_981/x" + input: "Square_181" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_616" + op: "Add" + input: "Mul_980" + input: "Mul_981" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_181" + op: "Sqrt" + input: "add_616" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_617/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_617" + op: "Add" + input: "Sqrt_181" + input: "add_617/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_182" + op: "RealDiv" + input: "add_615" + input: "add_617" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_982/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_982" + op: "Mul" + input: "mul_982/x" + input: "bert/encoder/layer_11/attention/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_618" + op: "Add" + input: "truediv_182" + input: "mul_982" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_983" + op: "Mul" + input: "add_2" + input: "add_618" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_182" + op: "Sub" + input: "bert/encoder/layer_11/attention/self/query/kernel/read" + input: "mul_983" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_749" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/query/kernel" + input: "sub_182" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_750" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/query/kernel/adam_m" + input: "add_615" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_751" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/query/kernel/adam_v" + input: "add_616" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_11/attention/self/query/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/self/query/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_11/attention/self/query/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/query/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/self/query/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_984/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_984" + op: "Mul" + input: "Mul_984/x" + input: "bert/encoder/layer_11/attention/self/query/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_985/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_985" + op: "Mul" + input: "Mul_985/x" + input: "clip_by_global_norm/clip_by_global_norm/_182" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_619" + op: "Add" + input: "Mul_984" + input: "Mul_985" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_986/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_986" + op: "Mul" + input: "Mul_986/x" + input: "bert/encoder/layer_11/attention/self/query/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_182" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_182" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_987/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_987" + op: "Mul" + input: "Mul_987/x" + input: "Square_182" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_620" + op: "Add" + input: "Mul_986" + input: "Mul_987" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_182" + op: "Sqrt" + input: "add_620" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_621/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_621" + op: "Add" + input: "Sqrt_182" + input: "add_621/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_183" + op: "RealDiv" + input: "add_619" + input: "add_621" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_988" + op: "Mul" + input: "add_2" + input: "truediv_183" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_183" + op: "Sub" + input: "bert/encoder/layer_11/attention/self/query/bias/read" + input: "mul_988" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_752" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/query/bias" + input: "sub_183" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_753" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/query/bias/adam_m" + input: "add_619" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_754" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/query/bias/adam_v" + input: "add_620" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_11/attention/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_11/attention/self/key/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_11/attention/self/key/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/self/key/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_11/attention/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_11/attention/self/key/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_11/attention/self/key/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/self/key/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_989/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_989" + op: "Mul" + input: "Mul_989/x" + input: "bert/encoder/layer_11/attention/self/key/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_990/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_990" + op: "Mul" + input: "Mul_990/x" + input: "clip_by_global_norm/clip_by_global_norm/_183" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_622" + op: "Add" + input: "Mul_989" + input: "Mul_990" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_991/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_991" + op: "Mul" + input: "Mul_991/x" + input: "bert/encoder/layer_11/attention/self/key/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_183" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_183" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_992/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_992" + op: "Mul" + input: "Mul_992/x" + input: "Square_183" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_623" + op: "Add" + input: "Mul_991" + input: "Mul_992" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_183" + op: "Sqrt" + input: "add_623" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_624/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_624" + op: "Add" + input: "Sqrt_183" + input: "add_624/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_184" + op: "RealDiv" + input: "add_622" + input: "add_624" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_993/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_993" + op: "Mul" + input: "mul_993/x" + input: "bert/encoder/layer_11/attention/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_625" + op: "Add" + input: "truediv_184" + input: "mul_993" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_994" + op: "Mul" + input: "add_2" + input: "add_625" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_184" + op: "Sub" + input: "bert/encoder/layer_11/attention/self/key/kernel/read" + input: "mul_994" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_755" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/key/kernel" + input: "sub_184" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_756" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/key/kernel/adam_m" + input: "add_622" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_757" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/key/kernel/adam_v" + input: "add_623" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_11/attention/self/key/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/self/key/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_11/attention/self/key/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/key/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/self/key/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_995/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_995" + op: "Mul" + input: "Mul_995/x" + input: "bert/encoder/layer_11/attention/self/key/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_996/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_996" + op: "Mul" + input: "Mul_996/x" + input: "clip_by_global_norm/clip_by_global_norm/_184" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_626" + op: "Add" + input: "Mul_995" + input: "Mul_996" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_997/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_997" + op: "Mul" + input: "Mul_997/x" + input: "bert/encoder/layer_11/attention/self/key/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_184" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_184" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_998/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_998" + op: "Mul" + input: "Mul_998/x" + input: "Square_184" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_627" + op: "Add" + input: "Mul_997" + input: "Mul_998" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_184" + op: "Sqrt" + input: "add_627" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_628/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_628" + op: "Add" + input: "Sqrt_184" + input: "add_628/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_185" + op: "RealDiv" + input: "add_626" + input: "add_628" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_999" + op: "Mul" + input: "add_2" + input: "truediv_185" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_185" + op: "Sub" + input: "bert/encoder/layer_11/attention/self/key/bias/read" + input: "mul_999" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_758" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/key/bias" + input: "sub_185" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_759" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/key/bias/adam_m" + input: "add_626" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_760" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/key/bias/adam_v" + input: "add_627" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_11/attention/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_11/attention/self/value/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_11/attention/self/value/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/self/value/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_11/attention/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_11/attention/self/value/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_11/attention/self/value/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/self/value/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1000/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_1000" + op: "Mul" + input: "Mul_1000/x" + input: "bert/encoder/layer_11/attention/self/value/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1001/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_1001" + op: "Mul" + input: "Mul_1001/x" + input: "clip_by_global_norm/clip_by_global_norm/_185" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_629" + op: "Add" + input: "Mul_1000" + input: "Mul_1001" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1002/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_1002" + op: "Mul" + input: "Mul_1002/x" + input: "bert/encoder/layer_11/attention/self/value/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_185" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_185" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1003/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_1003" + op: "Mul" + input: "Mul_1003/x" + input: "Square_185" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_630" + op: "Add" + input: "Mul_1002" + input: "Mul_1003" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_185" + op: "Sqrt" + input: "add_630" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_631/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_631" + op: "Add" + input: "Sqrt_185" + input: "add_631/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_186" + op: "RealDiv" + input: "add_629" + input: "add_631" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_1004/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_1004" + op: "Mul" + input: "mul_1004/x" + input: "bert/encoder/layer_11/attention/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_632" + op: "Add" + input: "truediv_186" + input: "mul_1004" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_1005" + op: "Mul" + input: "add_2" + input: "add_632" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_186" + op: "Sub" + input: "bert/encoder/layer_11/attention/self/value/kernel/read" + input: "mul_1005" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_761" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/value/kernel" + input: "sub_186" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_762" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/value/kernel/adam_m" + input: "add_629" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_763" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/value/kernel/adam_v" + input: "add_630" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_11/attention/self/value/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/self/value/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_11/attention/self/value/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/self/value/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/self/value/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1006/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_1006" + op: "Mul" + input: "Mul_1006/x" + input: "bert/encoder/layer_11/attention/self/value/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1007/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_1007" + op: "Mul" + input: "Mul_1007/x" + input: "clip_by_global_norm/clip_by_global_norm/_186" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_633" + op: "Add" + input: "Mul_1006" + input: "Mul_1007" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1008/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_1008" + op: "Mul" + input: "Mul_1008/x" + input: "bert/encoder/layer_11/attention/self/value/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_186" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_186" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1009/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_1009" + op: "Mul" + input: "Mul_1009/x" + input: "Square_186" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_634" + op: "Add" + input: "Mul_1008" + input: "Mul_1009" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_186" + op: "Sqrt" + input: "add_634" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_635/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_635" + op: "Add" + input: "Sqrt_186" + input: "add_635/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_187" + op: "RealDiv" + input: "add_633" + input: "add_635" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_1010" + op: "Mul" + input: "add_2" + input: "truediv_187" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_187" + op: "Sub" + input: "bert/encoder/layer_11/attention/self/value/bias/read" + input: "mul_1010" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_764" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/value/bias" + input: "sub_187" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_765" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/value/bias/adam_m" + input: "add_633" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_766" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/value/bias/adam_v" + input: "add_634" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1011/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_1011" + op: "Mul" + input: "Mul_1011/x" + input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1012/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_1012" + op: "Mul" + input: "Mul_1012/x" + input: "clip_by_global_norm/clip_by_global_norm/_187" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_636" + op: "Add" + input: "Mul_1011" + input: "Mul_1012" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1013/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_1013" + op: "Mul" + input: "Mul_1013/x" + input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_187" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_187" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1014/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_1014" + op: "Mul" + input: "Mul_1014/x" + input: "Square_187" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_637" + op: "Add" + input: "Mul_1013" + input: "Mul_1014" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_187" + op: "Sqrt" + input: "add_637" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_638/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_638" + op: "Add" + input: "Sqrt_187" + input: "add_638/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_188" + op: "RealDiv" + input: "add_636" + input: "add_638" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_1015/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_1015" + op: "Mul" + input: "mul_1015/x" + input: "bert/encoder/layer_11/attention/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_639" + op: "Add" + input: "truediv_188" + input: "mul_1015" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_1016" + op: "Mul" + input: "add_2" + input: "add_639" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_188" + op: "Sub" + input: "bert/encoder/layer_11/attention/output/dense/kernel/read" + input: "mul_1016" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_767" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/dense/kernel" + input: "sub_188" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_768" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m" + input: "add_636" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_769" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v" + input: "add_637" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_11/attention/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_11/attention/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1017/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_1017" + op: "Mul" + input: "Mul_1017/x" + input: "bert/encoder/layer_11/attention/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1018/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_1018" + op: "Mul" + input: "Mul_1018/x" + input: "clip_by_global_norm/clip_by_global_norm/_188" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_640" + op: "Add" + input: "Mul_1017" + input: "Mul_1018" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1019/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_1019" + op: "Mul" + input: "Mul_1019/x" + input: "bert/encoder/layer_11/attention/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_188" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_188" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1020/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_1020" + op: "Mul" + input: "Mul_1020/x" + input: "Square_188" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_641" + op: "Add" + input: "Mul_1019" + input: "Mul_1020" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_188" + op: "Sqrt" + input: "add_641" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_642/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_642" + op: "Add" + input: "Sqrt_188" + input: "add_642/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_189" + op: "RealDiv" + input: "add_640" + input: "add_642" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_1021" + op: "Mul" + input: "add_2" + input: "truediv_189" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_189" + op: "Sub" + input: "bert/encoder/layer_11/attention/output/dense/bias/read" + input: "mul_1021" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_770" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/dense/bias" + input: "sub_189" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_771" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/dense/bias/adam_m" + input: "add_640" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_772" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/dense/bias/adam_v" + input: "add_641" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1022/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_1022" + op: "Mul" + input: "Mul_1022/x" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1023/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_1023" + op: "Mul" + input: "Mul_1023/x" + input: "clip_by_global_norm/clip_by_global_norm/_189" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_643" + op: "Add" + input: "Mul_1022" + input: "Mul_1023" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1024/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_1024" + op: "Mul" + input: "Mul_1024/x" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_189" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_189" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1025/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_1025" + op: "Mul" + input: "Mul_1025/x" + input: "Square_189" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_644" + op: "Add" + input: "Mul_1024" + input: "Mul_1025" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_189" + op: "Sqrt" + input: "add_644" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_645/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_645" + op: "Add" + input: "Sqrt_189" + input: "add_645/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_190" + op: "RealDiv" + input: "add_643" + input: "add_645" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_1026" + op: "Mul" + input: "add_2" + input: "truediv_190" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_190" + op: "Sub" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/read" + input: "mul_1026" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_773" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta" + input: "sub_190" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_774" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m" + input: "add_643" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_775" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v" + input: "add_644" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1027/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_1027" + op: "Mul" + input: "Mul_1027/x" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1028/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_1028" + op: "Mul" + input: "Mul_1028/x" + input: "clip_by_global_norm/clip_by_global_norm/_190" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_646" + op: "Add" + input: "Mul_1027" + input: "Mul_1028" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1029/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_1029" + op: "Mul" + input: "Mul_1029/x" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_190" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_190" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1030/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_1030" + op: "Mul" + input: "Mul_1030/x" + input: "Square_190" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_647" + op: "Add" + input: "Mul_1029" + input: "Mul_1030" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_190" + op: "Sqrt" + input: "add_647" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_648/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_648" + op: "Add" + input: "Sqrt_190" + input: "add_648/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_191" + op: "RealDiv" + input: "add_646" + input: "add_648" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_1031" + op: "Mul" + input: "add_2" + input: "truediv_191" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_191" + op: "Sub" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/read" + input: "mul_1031" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_776" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma" + input: "sub_191" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_777" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m" + input: "add_646" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_778" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v" + input: "add_647" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_1032/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_1032" + op: "Mul" + input: "Mul_1032/x" + input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_1033/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_1033" + op: "Mul" + input: "Mul_1033/x" + input: "clip_by_global_norm/clip_by_global_norm/_191" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_649" + op: "Add" + input: "Mul_1032" + input: "Mul_1033" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_1034/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_1034" + op: "Mul" + input: "Mul_1034/x" + input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_191" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_191" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_1035/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_1035" + op: "Mul" + input: "Mul_1035/x" + input: "Square_191" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_650" + op: "Add" + input: "Mul_1034" + input: "Mul_1035" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_191" + op: "Sqrt" + input: "add_650" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_651/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_651" + op: "Add" + input: "Sqrt_191" + input: "add_651/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_192" + op: "RealDiv" + input: "add_649" + input: "add_651" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_1036/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_1036" + op: "Mul" + input: "mul_1036/x" + input: "bert/encoder/layer_11/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_652" + op: "Add" + input: "truediv_192" + input: "mul_1036" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_1037" + op: "Mul" + input: "add_2" + input: "add_652" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_192" + op: "Sub" + input: "bert/encoder/layer_11/intermediate/dense/kernel/read" + input: "mul_1037" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_779" + op: "Assign" + input: "bert/encoder/layer_11/intermediate/dense/kernel" + input: "sub_192" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_780" + op: "Assign" + input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m" + input: "add_649" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_781" + op: "Assign" + input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v" + input: "add_650" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/bias/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_11/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_11/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_11/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_11/intermediate/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_11/intermediate/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/bias/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_11/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_11/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_11/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_11/intermediate/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/intermediate/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_11/intermediate/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_1038/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_1038" + op: "Mul" + input: "Mul_1038/x" + input: "bert/encoder/layer_11/intermediate/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_1039/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_1039" + op: "Mul" + input: "Mul_1039/x" + input: "clip_by_global_norm/clip_by_global_norm/_192" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_653" + op: "Add" + input: "Mul_1038" + input: "Mul_1039" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_1040/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_1040" + op: "Mul" + input: "Mul_1040/x" + input: "bert/encoder/layer_11/intermediate/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_192" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_192" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_1041/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_1041" + op: "Mul" + input: "Mul_1041/x" + input: "Square_192" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_654" + op: "Add" + input: "Mul_1040" + input: "Mul_1041" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_192" + op: "Sqrt" + input: "add_654" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_655/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_655" + op: "Add" + input: "Sqrt_192" + input: "add_655/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_193" + op: "RealDiv" + input: "add_653" + input: "add_655" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_1042" + op: "Mul" + input: "add_2" + input: "truediv_193" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_193" + op: "Sub" + input: "bert/encoder/layer_11/intermediate/dense/bias/read" + input: "mul_1042" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_782" + op: "Assign" + input: "bert/encoder/layer_11/intermediate/dense/bias" + input: "sub_193" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_783" + op: "Assign" + input: "bert/encoder/layer_11/intermediate/dense/bias/adam_m" + input: "add_653" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_784" + op: "Assign" + input: "bert/encoder/layer_11/intermediate/dense/bias/adam_v" + input: "add_654" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_11/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_11/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_11/output/dense/kernel/adam_m" + input: "bert/encoder/layer_11/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_11/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/layer_11/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/layer_11/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_11/output/dense/kernel/adam_v" + input: "bert/encoder/layer_11/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_11/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1043/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_1043" + op: "Mul" + input: "Mul_1043/x" + input: "bert/encoder/layer_11/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1044/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_1044" + op: "Mul" + input: "Mul_1044/x" + input: "clip_by_global_norm/clip_by_global_norm/_193" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_656" + op: "Add" + input: "Mul_1043" + input: "Mul_1044" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1045/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_1045" + op: "Mul" + input: "Mul_1045/x" + input: "bert/encoder/layer_11/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_193" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_193" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1046/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_1046" + op: "Mul" + input: "Mul_1046/x" + input: "Square_193" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_657" + op: "Add" + input: "Mul_1045" + input: "Mul_1046" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_193" + op: "Sqrt" + input: "add_657" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_658/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_658" + op: "Add" + input: "Sqrt_193" + input: "add_658/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_194" + op: "RealDiv" + input: "add_656" + input: "add_658" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_1047/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_1047" + op: "Mul" + input: "mul_1047/x" + input: "bert/encoder/layer_11/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_659" + op: "Add" + input: "truediv_194" + input: "mul_1047" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_1048" + op: "Mul" + input: "add_2" + input: "add_659" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_194" + op: "Sub" + input: "bert/encoder/layer_11/output/dense/kernel/read" + input: "mul_1048" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_785" + op: "Assign" + input: "bert/encoder/layer_11/output/dense/kernel" + input: "sub_194" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_786" + op: "Assign" + input: "bert/encoder/layer_11/output/dense/kernel/adam_m" + input: "add_656" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_787" + op: "Assign" + input: "bert/encoder/layer_11/output/dense/kernel/adam_v" + input: "add_657" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_11/output/dense/bias/adam_m" + input: "bert/encoder/layer_11/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_11/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_11/output/dense/bias/adam_v" + input: "bert/encoder/layer_11/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_11/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1049/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_1049" + op: "Mul" + input: "Mul_1049/x" + input: "bert/encoder/layer_11/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1050/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_1050" + op: "Mul" + input: "Mul_1050/x" + input: "clip_by_global_norm/clip_by_global_norm/_194" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_660" + op: "Add" + input: "Mul_1049" + input: "Mul_1050" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1051/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_1051" + op: "Mul" + input: "Mul_1051/x" + input: "bert/encoder/layer_11/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_194" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_194" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1052/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_1052" + op: "Mul" + input: "Mul_1052/x" + input: "Square_194" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_661" + op: "Add" + input: "Mul_1051" + input: "Mul_1052" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_194" + op: "Sqrt" + input: "add_661" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_662/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_662" + op: "Add" + input: "Sqrt_194" + input: "add_662/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_195" + op: "RealDiv" + input: "add_660" + input: "add_662" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_1053" + op: "Mul" + input: "add_2" + input: "truediv_195" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_195" + op: "Sub" + input: "bert/encoder/layer_11/output/dense/bias/read" + input: "mul_1053" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_788" + op: "Assign" + input: "bert/encoder/layer_11/output/dense/bias" + input: "sub_195" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_789" + op: "Assign" + input: "bert/encoder/layer_11/output/dense/bias/adam_m" + input: "add_660" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_790" + op: "Assign" + input: "bert/encoder/layer_11/output/dense/bias/adam_v" + input: "add_661" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1054/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_1054" + op: "Mul" + input: "Mul_1054/x" + input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1055/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_1055" + op: "Mul" + input: "Mul_1055/x" + input: "clip_by_global_norm/clip_by_global_norm/_195" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_663" + op: "Add" + input: "Mul_1054" + input: "Mul_1055" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1056/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_1056" + op: "Mul" + input: "Mul_1056/x" + input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_195" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_195" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1057/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_1057" + op: "Mul" + input: "Mul_1057/x" + input: "Square_195" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_664" + op: "Add" + input: "Mul_1056" + input: "Mul_1057" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_195" + op: "Sqrt" + input: "add_664" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_665/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_665" + op: "Add" + input: "Sqrt_195" + input: "add_665/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_196" + op: "RealDiv" + input: "add_663" + input: "add_665" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_1058" + op: "Mul" + input: "add_2" + input: "truediv_196" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_196" + op: "Sub" + input: "bert/encoder/layer_11/output/LayerNorm/beta/read" + input: "mul_1058" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_791" + op: "Assign" + input: "bert/encoder/layer_11/output/LayerNorm/beta" + input: "sub_196" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_792" + op: "Assign" + input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m" + input: "add_663" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_793" + op: "Assign" + input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v" + input: "add_664" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1059/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_1059" + op: "Mul" + input: "Mul_1059/x" + input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1060/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_1060" + op: "Mul" + input: "Mul_1060/x" + input: "clip_by_global_norm/clip_by_global_norm/_196" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_666" + op: "Add" + input: "Mul_1059" + input: "Mul_1060" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1061/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_1061" + op: "Mul" + input: "Mul_1061/x" + input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_196" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_196" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1062/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_1062" + op: "Mul" + input: "Mul_1062/x" + input: "Square_196" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_667" + op: "Add" + input: "Mul_1061" + input: "Mul_1062" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_196" + op: "Sqrt" + input: "add_667" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_668/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_668" + op: "Add" + input: "Sqrt_196" + input: "add_668/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_197" + op: "RealDiv" + input: "add_666" + input: "add_668" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_1063" + op: "Mul" + input: "add_2" + input: "truediv_197" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_197" + op: "Sub" + input: "bert/encoder/layer_11/output/LayerNorm/gamma/read" + input: "mul_1063" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_794" + op: "Assign" + input: "bert/encoder/layer_11/output/LayerNorm/gamma" + input: "sub_197" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_795" + op: "Assign" + input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m" + input: "add_666" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_796" + op: "Assign" + input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v" + input: "add_667" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/pooler/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/pooler/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/pooler/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/pooler/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/pooler/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/pooler/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/pooler/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/pooler/dense/kernel/adam_m" + input: "bert/pooler/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/pooler/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/pooler/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/pooler/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/pooler/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/pooler/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/pooler/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/pooler/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/pooler/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/pooler/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/pooler/dense/kernel/adam_v" + input: "bert/pooler/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/pooler/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/pooler/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1064/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_1064" + op: "Mul" + input: "Mul_1064/x" + input: "bert/pooler/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1065/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_1065" + op: "Mul" + input: "Mul_1065/x" + input: "clip_by_global_norm/clip_by_global_norm/_197" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_669" + op: "Add" + input: "Mul_1064" + input: "Mul_1065" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1066/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_1066" + op: "Mul" + input: "Mul_1066/x" + input: "bert/pooler/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_197" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_197" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1067/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_1067" + op: "Mul" + input: "Mul_1067/x" + input: "Square_197" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_670" + op: "Add" + input: "Mul_1066" + input: "Mul_1067" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_197" + op: "Sqrt" + input: "add_670" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_671/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_671" + op: "Add" + input: "Sqrt_197" + input: "add_671/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_198" + op: "RealDiv" + input: "add_669" + input: "add_671" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_1068/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_1068" + op: "Mul" + input: "mul_1068/x" + input: "bert/pooler/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_672" + op: "Add" + input: "truediv_198" + input: "mul_1068" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_1069" + op: "Mul" + input: "add_2" + input: "add_672" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_198" + op: "Sub" + input: "bert/pooler/dense/kernel/read" + input: "mul_1069" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_797" + op: "Assign" + input: "bert/pooler/dense/kernel" + input: "sub_198" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_798" + op: "Assign" + input: "bert/pooler/dense/kernel/adam_m" + input: "add_669" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_799" + op: "Assign" + input: "bert/pooler/dense/kernel/adam_v" + input: "add_670" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/pooler/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/pooler/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/pooler/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/pooler/dense/bias/adam_m" + input: "bert/pooler/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/pooler/dense/bias/adam_m/read" + op: "Identity" + input: "bert/pooler/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/pooler/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/pooler/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/pooler/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/pooler/dense/bias/adam_v" + input: "bert/pooler/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/pooler/dense/bias/adam_v/read" + op: "Identity" + input: "bert/pooler/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1070/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_1070" + op: "Mul" + input: "Mul_1070/x" + input: "bert/pooler/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1071/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_1071" + op: "Mul" + input: "Mul_1071/x" + input: "clip_by_global_norm/clip_by_global_norm/_198" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_673" + op: "Add" + input: "Mul_1070" + input: "Mul_1071" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1072/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_1072" + op: "Mul" + input: "Mul_1072/x" + input: "bert/pooler/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_198" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_198" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1073/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_1073" + op: "Mul" + input: "Mul_1073/x" + input: "Square_198" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_674" + op: "Add" + input: "Mul_1072" + input: "Mul_1073" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_198" + op: "Sqrt" + input: "add_674" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_675/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_675" + op: "Add" + input: "Sqrt_198" + input: "add_675/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_199" + op: "RealDiv" + input: "add_673" + input: "add_675" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_1074" + op: "Mul" + input: "add_2" + input: "truediv_199" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_199" + op: "Sub" + input: "bert/pooler/dense/bias/read" + input: "mul_1074" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_800" + op: "Assign" + input: "bert/pooler/dense/bias" + input: "sub_199" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_801" + op: "Assign" + input: "bert/pooler/dense/bias/adam_m" + input: "add_673" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_802" + op: "Assign" + input: "bert/pooler/dense/bias/adam_v" + input: "add_674" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/predictions/transform/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "cls/predictions/transform/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "cls/predictions/transform/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "cls/predictions/transform/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "cls/predictions/transform/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "cls/predictions/transform/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "cls/predictions/transform/dense/kernel/adam_m/Assign" + op: "Assign" + input: "cls/predictions/transform/dense/kernel/adam_m" + input: "cls/predictions/transform/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/predictions/transform/dense/kernel/adam_m/read" + op: "Identity" + input: "cls/predictions/transform/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "cls/predictions/transform/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "cls/predictions/transform/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "cls/predictions/transform/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "cls/predictions/transform/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "cls/predictions/transform/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "cls/predictions/transform/dense/kernel/adam_v/Assign" + op: "Assign" + input: "cls/predictions/transform/dense/kernel/adam_v" + input: "cls/predictions/transform/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/predictions/transform/dense/kernel/adam_v/read" + op: "Identity" + input: "cls/predictions/transform/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1075/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_1075" + op: "Mul" + input: "Mul_1075/x" + input: "cls/predictions/transform/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1076/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_1076" + op: "Mul" + input: "Mul_1076/x" + input: "clip_by_global_norm/clip_by_global_norm/_199" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_676" + op: "Add" + input: "Mul_1075" + input: "Mul_1076" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1077/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_1077" + op: "Mul" + input: "Mul_1077/x" + input: "cls/predictions/transform/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_199" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_199" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1078/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_1078" + op: "Mul" + input: "Mul_1078/x" + input: "Square_199" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_677" + op: "Add" + input: "Mul_1077" + input: "Mul_1078" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_199" + op: "Sqrt" + input: "add_677" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_678/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_678" + op: "Add" + input: "Sqrt_199" + input: "add_678/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_200" + op: "RealDiv" + input: "add_676" + input: "add_678" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_1079/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_1079" + op: "Mul" + input: "mul_1079/x" + input: "cls/predictions/transform/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_679" + op: "Add" + input: "truediv_200" + input: "mul_1079" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_1080" + op: "Mul" + input: "add_2" + input: "add_679" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_200" + op: "Sub" + input: "cls/predictions/transform/dense/kernel/read" + input: "mul_1080" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_803" + op: "Assign" + input: "cls/predictions/transform/dense/kernel" + input: "sub_200" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_804" + op: "Assign" + input: "cls/predictions/transform/dense/kernel/adam_m" + input: "add_676" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_805" + op: "Assign" + input: "cls/predictions/transform/dense/kernel/adam_v" + input: "add_677" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/predictions/transform/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "cls/predictions/transform/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "cls/predictions/transform/dense/bias/adam_m/Assign" + op: "Assign" + input: "cls/predictions/transform/dense/bias/adam_m" + input: "cls/predictions/transform/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/predictions/transform/dense/bias/adam_m/read" + op: "Identity" + input: "cls/predictions/transform/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "cls/predictions/transform/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "cls/predictions/transform/dense/bias/adam_v/Assign" + op: "Assign" + input: "cls/predictions/transform/dense/bias/adam_v" + input: "cls/predictions/transform/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/predictions/transform/dense/bias/adam_v/read" + op: "Identity" + input: "cls/predictions/transform/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1081/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_1081" + op: "Mul" + input: "Mul_1081/x" + input: "cls/predictions/transform/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1082/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_1082" + op: "Mul" + input: "Mul_1082/x" + input: "clip_by_global_norm/clip_by_global_norm/_200" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_680" + op: "Add" + input: "Mul_1081" + input: "Mul_1082" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1083/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_1083" + op: "Mul" + input: "Mul_1083/x" + input: "cls/predictions/transform/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_200" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_200" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1084/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_1084" + op: "Mul" + input: "Mul_1084/x" + input: "Square_200" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_681" + op: "Add" + input: "Mul_1083" + input: "Mul_1084" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_200" + op: "Sqrt" + input: "add_681" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_682/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_682" + op: "Add" + input: "Sqrt_200" + input: "add_682/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_201" + op: "RealDiv" + input: "add_680" + input: "add_682" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_1085" + op: "Mul" + input: "add_2" + input: "truediv_201" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_201" + op: "Sub" + input: "cls/predictions/transform/dense/bias/read" + input: "mul_1085" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_806" + op: "Assign" + input: "cls/predictions/transform/dense/bias" + input: "sub_201" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_807" + op: "Assign" + input: "cls/predictions/transform/dense/bias/adam_m" + input: "add_680" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_808" + op: "Assign" + input: "cls/predictions/transform/dense/bias/adam_v" + input: "add_681" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "cls/predictions/transform/LayerNorm/beta/adam_m" + input: "cls/predictions/transform/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "cls/predictions/transform/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "cls/predictions/transform/LayerNorm/beta/adam_v" + input: "cls/predictions/transform/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "cls/predictions/transform/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1086/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_1086" + op: "Mul" + input: "Mul_1086/x" + input: "cls/predictions/transform/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1087/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_1087" + op: "Mul" + input: "Mul_1087/x" + input: "clip_by_global_norm/clip_by_global_norm/_201" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_683" + op: "Add" + input: "Mul_1086" + input: "Mul_1087" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1088/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_1088" + op: "Mul" + input: "Mul_1088/x" + input: "cls/predictions/transform/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_201" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_201" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1089/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_1089" + op: "Mul" + input: "Mul_1089/x" + input: "Square_201" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_684" + op: "Add" + input: "Mul_1088" + input: "Mul_1089" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_201" + op: "Sqrt" + input: "add_684" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_685/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_685" + op: "Add" + input: "Sqrt_201" + input: "add_685/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_202" + op: "RealDiv" + input: "add_683" + input: "add_685" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_1090" + op: "Mul" + input: "add_2" + input: "truediv_202" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_202" + op: "Sub" + input: "cls/predictions/transform/LayerNorm/beta/read" + input: "mul_1090" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_809" + op: "Assign" + input: "cls/predictions/transform/LayerNorm/beta" + input: "sub_202" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_810" + op: "Assign" + input: "cls/predictions/transform/LayerNorm/beta/adam_m" + input: "add_683" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_811" + op: "Assign" + input: "cls/predictions/transform/LayerNorm/beta/adam_v" + input: "add_684" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "cls/predictions/transform/LayerNorm/gamma/adam_m" + input: "cls/predictions/transform/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "cls/predictions/transform/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "cls/predictions/transform/LayerNorm/gamma/adam_v" + input: "cls/predictions/transform/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "cls/predictions/transform/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1091/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_1091" + op: "Mul" + input: "Mul_1091/x" + input: "cls/predictions/transform/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1092/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_1092" + op: "Mul" + input: "Mul_1092/x" + input: "clip_by_global_norm/clip_by_global_norm/_202" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_686" + op: "Add" + input: "Mul_1091" + input: "Mul_1092" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1093/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_1093" + op: "Mul" + input: "Mul_1093/x" + input: "cls/predictions/transform/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_202" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_202" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1094/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_1094" + op: "Mul" + input: "Mul_1094/x" + input: "Square_202" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_687" + op: "Add" + input: "Mul_1093" + input: "Mul_1094" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_202" + op: "Sqrt" + input: "add_687" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_688/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_688" + op: "Add" + input: "Sqrt_202" + input: "add_688/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_203" + op: "RealDiv" + input: "add_686" + input: "add_688" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_1095" + op: "Mul" + input: "add_2" + input: "truediv_203" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_203" + op: "Sub" + input: "cls/predictions/transform/LayerNorm/gamma/read" + input: "mul_1095" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_812" + op: "Assign" + input: "cls/predictions/transform/LayerNorm/gamma" + input: "sub_203" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_813" + op: "Assign" + input: "cls/predictions/transform/LayerNorm/gamma/adam_m" + input: "add_686" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_814" + op: "Assign" + input: "cls/predictions/transform/LayerNorm/gamma/adam_v" + input: "add_687" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/predictions/output_bias/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 28996 + } + } + } +} +node { + name: "cls/predictions/output_bias/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "cls/predictions/output_bias/adam_m/Initializer/zeros" + op: "Fill" + input: "cls/predictions/output_bias/adam_m/Initializer/zeros/shape_as_tensor" + input: "cls/predictions/output_bias/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "cls/predictions/output_bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 28996 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "cls/predictions/output_bias/adam_m/Assign" + op: "Assign" + input: "cls/predictions/output_bias/adam_m" + input: "cls/predictions/output_bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/predictions/output_bias/adam_m/read" + op: "Identity" + input: "cls/predictions/output_bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + } + } + } + } +} +node { + name: "cls/predictions/output_bias/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 28996 + } + } + } +} +node { + name: "cls/predictions/output_bias/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "cls/predictions/output_bias/adam_v/Initializer/zeros" + op: "Fill" + input: "cls/predictions/output_bias/adam_v/Initializer/zeros/shape_as_tensor" + input: "cls/predictions/output_bias/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "cls/predictions/output_bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 28996 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "cls/predictions/output_bias/adam_v/Assign" + op: "Assign" + input: "cls/predictions/output_bias/adam_v" + input: "cls/predictions/output_bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/predictions/output_bias/adam_v/read" + op: "Identity" + input: "cls/predictions/output_bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + } + } + } + } +} +node { + name: "Mul_1096/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_1096" + op: "Mul" + input: "Mul_1096/x" + input: "cls/predictions/output_bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + } + } + } + } +} +node { + name: "Mul_1097/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_1097" + op: "Mul" + input: "Mul_1097/x" + input: "clip_by_global_norm/clip_by_global_norm/_203" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + } + } + } + } +} +node { + name: "add_689" + op: "Add" + input: "Mul_1096" + input: "Mul_1097" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + } + } + } + } +} +node { + name: "Mul_1098/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_1098" + op: "Mul" + input: "Mul_1098/x" + input: "cls/predictions/output_bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + } + } + } + } +} +node { + name: "Square_203" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_203" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + } + } + } + } +} +node { + name: "Mul_1099/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_1099" + op: "Mul" + input: "Mul_1099/x" + input: "Square_203" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + } + } + } + } +} +node { + name: "add_690" + op: "Add" + input: "Mul_1098" + input: "Mul_1099" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + } + } + } + } +} +node { + name: "Sqrt_203" + op: "Sqrt" + input: "add_690" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + } + } + } + } +} +node { + name: "add_691/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_691" + op: "Add" + input: "Sqrt_203" + input: "add_691/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + } + } + } + } +} +node { + name: "truediv_204" + op: "RealDiv" + input: "add_689" + input: "add_691" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + } + } + } + } +} +node { + name: "mul_1100" + op: "Mul" + input: "add_2" + input: "truediv_204" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + } + } + } + } +} +node { + name: "sub_204" + op: "Sub" + input: "cls/predictions/output_bias/read" + input: "mul_1100" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + } + } + } + } +} +node { + name: "Assign_815" + op: "Assign" + input: "cls/predictions/output_bias" + input: "sub_204" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_816" + op: "Assign" + input: "cls/predictions/output_bias/adam_m" + input: "add_689" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_817" + op: "Assign" + input: "cls/predictions/output_bias/adam_v" + input: "add_690" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/seq_relationship/output_weights/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\002\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "cls/seq_relationship/output_weights/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "cls/seq_relationship/output_weights/adam_m/Initializer/zeros" + op: "Fill" + input: "cls/seq_relationship/output_weights/adam_m/Initializer/zeros/shape_as_tensor" + input: "cls/seq_relationship/output_weights/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "cls/seq_relationship/output_weights/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "cls/seq_relationship/output_weights/adam_m/Assign" + op: "Assign" + input: "cls/seq_relationship/output_weights/adam_m" + input: "cls/seq_relationship/output_weights/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/seq_relationship/output_weights/adam_m/read" + op: "Identity" + input: "cls/seq_relationship/output_weights/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "cls/seq_relationship/output_weights/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\002\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "cls/seq_relationship/output_weights/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "cls/seq_relationship/output_weights/adam_v/Initializer/zeros" + op: "Fill" + input: "cls/seq_relationship/output_weights/adam_v/Initializer/zeros/shape_as_tensor" + input: "cls/seq_relationship/output_weights/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "cls/seq_relationship/output_weights/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "cls/seq_relationship/output_weights/adam_v/Assign" + op: "Assign" + input: "cls/seq_relationship/output_weights/adam_v" + input: "cls/seq_relationship/output_weights/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/seq_relationship/output_weights/adam_v/read" + op: "Identity" + input: "cls/seq_relationship/output_weights/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1101/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_1101" + op: "Mul" + input: "Mul_1101/x" + input: "cls/seq_relationship/output_weights/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1102/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_1102" + op: "Mul" + input: "Mul_1102/x" + input: "clip_by_global_norm/clip_by_global_norm/_204" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_692" + op: "Add" + input: "Mul_1101" + input: "Mul_1102" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1103/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_1103" + op: "Mul" + input: "Mul_1103/x" + input: "cls/seq_relationship/output_weights/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_204" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_204" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_1104/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_1104" + op: "Mul" + input: "Mul_1104/x" + input: "Square_204" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_693" + op: "Add" + input: "Mul_1103" + input: "Mul_1104" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_204" + op: "Sqrt" + input: "add_693" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_694/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_694" + op: "Add" + input: "Sqrt_204" + input: "add_694/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_205" + op: "RealDiv" + input: "add_692" + input: "add_694" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_1105/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_1105" + op: "Mul" + input: "mul_1105/x" + input: "cls/seq_relationship/output_weights/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_695" + op: "Add" + input: "truediv_205" + input: "mul_1105" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_1106" + op: "Mul" + input: "add_2" + input: "add_695" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_205" + op: "Sub" + input: "cls/seq_relationship/output_weights/read" + input: "mul_1106" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_818" + op: "Assign" + input: "cls/seq_relationship/output_weights" + input: "sub_205" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_819" + op: "Assign" + input: "cls/seq_relationship/output_weights/adam_m" + input: "add_692" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_820" + op: "Assign" + input: "cls/seq_relationship/output_weights/adam_v" + input: "add_693" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/seq_relationship/output_bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 2 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "cls/seq_relationship/output_bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 2 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "cls/seq_relationship/output_bias/adam_m/Assign" + op: "Assign" + input: "cls/seq_relationship/output_bias/adam_m" + input: "cls/seq_relationship/output_bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/seq_relationship/output_bias/adam_m/read" + op: "Identity" + input: "cls/seq_relationship/output_bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "cls/seq_relationship/output_bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 2 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "cls/seq_relationship/output_bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 2 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "cls/seq_relationship/output_bias/adam_v/Assign" + op: "Assign" + input: "cls/seq_relationship/output_bias/adam_v" + input: "cls/seq_relationship/output_bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/seq_relationship/output_bias/adam_v/read" + op: "Identity" + input: "cls/seq_relationship/output_bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "Mul_1107/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_1107" + op: "Mul" + input: "Mul_1107/x" + input: "cls/seq_relationship/output_bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "Mul_1108/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_1108" + op: "Mul" + input: "Mul_1108/x" + input: "clip_by_global_norm/clip_by_global_norm/_205" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "add_696" + op: "Add" + input: "Mul_1107" + input: "Mul_1108" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "Mul_1109/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_1109" + op: "Mul" + input: "Mul_1109/x" + input: "cls/seq_relationship/output_bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "Square_205" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_205" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "Mul_1110/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_1110" + op: "Mul" + input: "Mul_1110/x" + input: "Square_205" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "add_697" + op: "Add" + input: "Mul_1109" + input: "Mul_1110" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "Sqrt_205" + op: "Sqrt" + input: "add_697" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "add_698/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_698" + op: "Add" + input: "Sqrt_205" + input: "add_698/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "truediv_206" + op: "RealDiv" + input: "add_696" + input: "add_698" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "mul_1111" + op: "Mul" + input: "add_2" + input: "truediv_206" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "sub_206" + op: "Sub" + input: "cls/seq_relationship/output_bias/read" + input: "mul_1111" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "Assign_821" + op: "Assign" + input: "cls/seq_relationship/output_bias" + input: "sub_206" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_822" + op: "Assign" + input: "cls/seq_relationship/output_bias/adam_m" + input: "add_696" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_823" + op: "Assign" + input: "cls/seq_relationship/output_bias/adam_v" + input: "add_697" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "group_deps" + op: "NoOp" + input: "^Assign_206" + input: "^Assign_207" + input: "^Assign_208" + input: "^Assign_209" + input: "^Assign_210" + input: "^Assign_211" + input: "^Assign_212" + input: "^Assign_213" + input: "^Assign_214" + input: "^Assign_215" + input: "^Assign_216" + input: "^Assign_217" + input: "^Assign_218" + input: "^Assign_219" + input: "^Assign_220" + input: "^Assign_221" + input: "^Assign_222" + input: "^Assign_223" + input: "^Assign_224" + input: "^Assign_225" + input: "^Assign_226" + input: "^Assign_227" + input: "^Assign_228" + input: "^Assign_229" + input: "^Assign_230" + input: "^Assign_231" + input: "^Assign_232" + input: "^Assign_233" + input: "^Assign_234" + input: "^Assign_235" + input: "^Assign_236" + input: "^Assign_237" + input: "^Assign_238" + input: "^Assign_239" + input: "^Assign_240" + input: "^Assign_241" + input: "^Assign_242" + input: "^Assign_243" + input: "^Assign_244" + input: "^Assign_245" + input: "^Assign_246" + input: "^Assign_247" + input: "^Assign_248" + input: "^Assign_249" + input: "^Assign_250" + input: "^Assign_251" + input: "^Assign_252" + input: "^Assign_253" + input: "^Assign_254" + input: "^Assign_255" + input: "^Assign_256" + input: "^Assign_257" + input: "^Assign_258" + input: "^Assign_259" + input: "^Assign_260" + input: "^Assign_261" + input: "^Assign_262" + input: "^Assign_263" + input: "^Assign_264" + input: "^Assign_265" + input: "^Assign_266" + input: "^Assign_267" + input: "^Assign_268" + input: "^Assign_269" + input: "^Assign_270" + input: "^Assign_271" + input: "^Assign_272" + input: "^Assign_273" + input: "^Assign_274" + input: "^Assign_275" + input: "^Assign_276" + input: "^Assign_277" + input: "^Assign_278" + input: "^Assign_279" + input: "^Assign_280" + input: "^Assign_281" + input: "^Assign_282" + input: "^Assign_283" + input: "^Assign_284" + input: "^Assign_285" + input: "^Assign_286" + input: "^Assign_287" + input: "^Assign_288" + input: "^Assign_289" + input: "^Assign_290" + input: "^Assign_291" + input: "^Assign_292" + input: "^Assign_293" + input: "^Assign_294" + input: "^Assign_295" + input: "^Assign_296" + input: "^Assign_297" + input: "^Assign_298" + input: "^Assign_299" + input: "^Assign_300" + input: "^Assign_301" + input: "^Assign_302" + input: "^Assign_303" + input: "^Assign_304" + input: "^Assign_305" + input: "^Assign_306" + input: "^Assign_307" + input: "^Assign_308" + input: "^Assign_309" + input: "^Assign_310" + input: "^Assign_311" + input: "^Assign_312" + input: "^Assign_313" + input: "^Assign_314" + input: "^Assign_315" + input: "^Assign_316" + input: "^Assign_317" + input: "^Assign_318" + input: "^Assign_319" + input: "^Assign_320" + input: "^Assign_321" + input: "^Assign_322" + input: "^Assign_323" + input: "^Assign_324" + input: "^Assign_325" + input: "^Assign_326" + input: "^Assign_327" + input: "^Assign_328" + input: "^Assign_329" + input: "^Assign_330" + input: "^Assign_331" + input: "^Assign_332" + input: "^Assign_333" + input: "^Assign_334" + input: "^Assign_335" + input: "^Assign_336" + input: "^Assign_337" + input: "^Assign_338" + input: "^Assign_339" + input: "^Assign_340" + input: "^Assign_341" + input: "^Assign_342" + input: "^Assign_343" + input: "^Assign_344" + input: "^Assign_345" + input: "^Assign_346" + input: "^Assign_347" + input: "^Assign_348" + input: "^Assign_349" + input: "^Assign_350" + input: "^Assign_351" + input: "^Assign_352" + input: "^Assign_353" + input: "^Assign_354" + input: "^Assign_355" + input: "^Assign_356" + input: "^Assign_357" + input: "^Assign_358" + input: "^Assign_359" + input: "^Assign_360" + input: "^Assign_361" + input: "^Assign_362" + input: "^Assign_363" + input: "^Assign_364" + input: "^Assign_365" + input: "^Assign_366" + input: "^Assign_367" + input: "^Assign_368" + input: "^Assign_369" + input: "^Assign_370" + input: "^Assign_371" + input: "^Assign_372" + input: "^Assign_373" + input: "^Assign_374" + input: "^Assign_375" + input: "^Assign_376" + input: "^Assign_377" + input: "^Assign_378" + input: "^Assign_379" + input: "^Assign_380" + input: "^Assign_381" + input: "^Assign_382" + input: "^Assign_383" + input: "^Assign_384" + input: "^Assign_385" + input: "^Assign_386" + input: "^Assign_387" + input: "^Assign_388" + input: "^Assign_389" + input: "^Assign_390" + input: "^Assign_391" + input: "^Assign_392" + input: "^Assign_393" + input: "^Assign_394" + input: "^Assign_395" + input: "^Assign_396" + input: "^Assign_397" + input: "^Assign_398" + input: "^Assign_399" + input: "^Assign_400" + input: "^Assign_401" + input: "^Assign_402" + input: "^Assign_403" + input: "^Assign_404" + input: "^Assign_405" + input: "^Assign_406" + input: "^Assign_407" + input: "^Assign_408" + input: "^Assign_409" + input: "^Assign_410" + input: "^Assign_411" + input: "^Assign_412" + input: "^Assign_413" + input: "^Assign_414" + input: "^Assign_415" + input: "^Assign_416" + input: "^Assign_417" + input: "^Assign_418" + input: "^Assign_419" + input: "^Assign_420" + input: "^Assign_421" + input: "^Assign_422" + input: "^Assign_423" + input: "^Assign_424" + input: "^Assign_425" + input: "^Assign_426" + input: "^Assign_427" + input: "^Assign_428" + input: "^Assign_429" + input: "^Assign_430" + input: "^Assign_431" + input: "^Assign_432" + input: "^Assign_433" + input: "^Assign_434" + input: "^Assign_435" + input: "^Assign_436" + input: "^Assign_437" + input: "^Assign_438" + input: "^Assign_439" + input: "^Assign_440" + input: "^Assign_441" + input: "^Assign_442" + input: "^Assign_443" + input: "^Assign_444" + input: "^Assign_445" + input: "^Assign_446" + input: "^Assign_447" + input: "^Assign_448" + input: "^Assign_449" + input: "^Assign_450" + input: "^Assign_451" + input: "^Assign_452" + input: "^Assign_453" + input: "^Assign_454" + input: "^Assign_455" + input: "^Assign_456" + input: "^Assign_457" + input: "^Assign_458" + input: "^Assign_459" + input: "^Assign_460" + input: "^Assign_461" + input: "^Assign_462" + input: "^Assign_463" + input: "^Assign_464" + input: "^Assign_465" + input: "^Assign_466" + input: "^Assign_467" + input: "^Assign_468" + input: "^Assign_469" + input: "^Assign_470" + input: "^Assign_471" + input: "^Assign_472" + input: "^Assign_473" + input: "^Assign_474" + input: "^Assign_475" + input: "^Assign_476" + input: "^Assign_477" + input: "^Assign_478" + input: "^Assign_479" + input: "^Assign_480" + input: "^Assign_481" + input: "^Assign_482" + input: "^Assign_483" + input: "^Assign_484" + input: "^Assign_485" + input: "^Assign_486" + input: "^Assign_487" + input: "^Assign_488" + input: "^Assign_489" + input: "^Assign_490" + input: "^Assign_491" + input: "^Assign_492" + input: "^Assign_493" + input: "^Assign_494" + input: "^Assign_495" + input: "^Assign_496" + input: "^Assign_497" + input: "^Assign_498" + input: "^Assign_499" + input: "^Assign_500" + input: "^Assign_501" + input: "^Assign_502" + input: "^Assign_503" + input: "^Assign_504" + input: "^Assign_505" + input: "^Assign_506" + input: "^Assign_507" + input: "^Assign_508" + input: "^Assign_509" + input: "^Assign_510" + input: "^Assign_511" + input: "^Assign_512" + input: "^Assign_513" + input: "^Assign_514" + input: "^Assign_515" + input: "^Assign_516" + input: "^Assign_517" + input: "^Assign_518" + input: "^Assign_519" + input: "^Assign_520" + input: "^Assign_521" + input: "^Assign_522" + input: "^Assign_523" + input: "^Assign_524" + input: "^Assign_525" + input: "^Assign_526" + input: "^Assign_527" + input: "^Assign_528" + input: "^Assign_529" + input: "^Assign_530" + input: "^Assign_531" + input: "^Assign_532" + input: "^Assign_533" + input: "^Assign_534" + input: "^Assign_535" + input: "^Assign_536" + input: "^Assign_537" + input: "^Assign_538" + input: "^Assign_539" + input: "^Assign_540" + input: "^Assign_541" + input: "^Assign_542" + input: "^Assign_543" + input: "^Assign_544" + input: "^Assign_545" + input: "^Assign_546" + input: "^Assign_547" + input: "^Assign_548" + input: "^Assign_549" + input: "^Assign_550" + input: "^Assign_551" + input: "^Assign_552" + input: "^Assign_553" + input: "^Assign_554" + input: "^Assign_555" + input: "^Assign_556" + input: "^Assign_557" + input: "^Assign_558" + input: "^Assign_559" + input: "^Assign_560" + input: "^Assign_561" + input: "^Assign_562" + input: "^Assign_563" + input: "^Assign_564" + input: "^Assign_565" + input: "^Assign_566" + input: "^Assign_567" + input: "^Assign_568" + input: "^Assign_569" + input: "^Assign_570" + input: "^Assign_571" + input: "^Assign_572" + input: "^Assign_573" + input: "^Assign_574" + input: "^Assign_575" + input: "^Assign_576" + input: "^Assign_577" + input: "^Assign_578" + input: "^Assign_579" + input: "^Assign_580" + input: "^Assign_581" + input: "^Assign_582" + input: "^Assign_583" + input: "^Assign_584" + input: "^Assign_585" + input: "^Assign_586" + input: "^Assign_587" + input: "^Assign_588" + input: "^Assign_589" + input: "^Assign_590" + input: "^Assign_591" + input: "^Assign_592" + input: "^Assign_593" + input: "^Assign_594" + input: "^Assign_595" + input: "^Assign_596" + input: "^Assign_597" + input: "^Assign_598" + input: "^Assign_599" + input: "^Assign_600" + input: "^Assign_601" + input: "^Assign_602" + input: "^Assign_603" + input: "^Assign_604" + input: "^Assign_605" + input: "^Assign_606" + input: "^Assign_607" + input: "^Assign_608" + input: "^Assign_609" + input: "^Assign_610" + input: "^Assign_611" + input: "^Assign_612" + input: "^Assign_613" + input: "^Assign_614" + input: "^Assign_615" + input: "^Assign_616" + input: "^Assign_617" + input: "^Assign_618" + input: "^Assign_619" + input: "^Assign_620" + input: "^Assign_621" + input: "^Assign_622" + input: "^Assign_623" + input: "^Assign_624" + input: "^Assign_625" + input: "^Assign_626" + input: "^Assign_627" + input: "^Assign_628" + input: "^Assign_629" + input: "^Assign_630" + input: "^Assign_631" + input: "^Assign_632" + input: "^Assign_633" + input: "^Assign_634" + input: "^Assign_635" + input: "^Assign_636" + input: "^Assign_637" + input: "^Assign_638" + input: "^Assign_639" + input: "^Assign_640" + input: "^Assign_641" + input: "^Assign_642" + input: "^Assign_643" + input: "^Assign_644" + input: "^Assign_645" + input: "^Assign_646" + input: "^Assign_647" + input: "^Assign_648" + input: "^Assign_649" + input: "^Assign_650" + input: "^Assign_651" + input: "^Assign_652" + input: "^Assign_653" + input: "^Assign_654" + input: "^Assign_655" + input: "^Assign_656" + input: "^Assign_657" + input: "^Assign_658" + input: "^Assign_659" + input: "^Assign_660" + input: "^Assign_661" + input: "^Assign_662" + input: "^Assign_663" + input: "^Assign_664" + input: "^Assign_665" + input: "^Assign_666" + input: "^Assign_667" + input: "^Assign_668" + input: "^Assign_669" + input: "^Assign_670" + input: "^Assign_671" + input: "^Assign_672" + input: "^Assign_673" + input: "^Assign_674" + input: "^Assign_675" + input: "^Assign_676" + input: "^Assign_677" + input: "^Assign_678" + input: "^Assign_679" + input: "^Assign_680" + input: "^Assign_681" + input: "^Assign_682" + input: "^Assign_683" + input: "^Assign_684" + input: "^Assign_685" + input: "^Assign_686" + input: "^Assign_687" + input: "^Assign_688" + input: "^Assign_689" + input: "^Assign_690" + input: "^Assign_691" + input: "^Assign_692" + input: "^Assign_693" + input: "^Assign_694" + input: "^Assign_695" + input: "^Assign_696" + input: "^Assign_697" + input: "^Assign_698" + input: "^Assign_699" + input: "^Assign_700" + input: "^Assign_701" + input: "^Assign_702" + input: "^Assign_703" + input: "^Assign_704" + input: "^Assign_705" + input: "^Assign_706" + input: "^Assign_707" + input: "^Assign_708" + input: "^Assign_709" + input: "^Assign_710" + input: "^Assign_711" + input: "^Assign_712" + input: "^Assign_713" + input: "^Assign_714" + input: "^Assign_715" + input: "^Assign_716" + input: "^Assign_717" + input: "^Assign_718" + input: "^Assign_719" + input: "^Assign_720" + input: "^Assign_721" + input: "^Assign_722" + input: "^Assign_723" + input: "^Assign_724" + input: "^Assign_725" + input: "^Assign_726" + input: "^Assign_727" + input: "^Assign_728" + input: "^Assign_729" + input: "^Assign_730" + input: "^Assign_731" + input: "^Assign_732" + input: "^Assign_733" + input: "^Assign_734" + input: "^Assign_735" + input: "^Assign_736" + input: "^Assign_737" + input: "^Assign_738" + input: "^Assign_739" + input: "^Assign_740" + input: "^Assign_741" + input: "^Assign_742" + input: "^Assign_743" + input: "^Assign_744" + input: "^Assign_745" + input: "^Assign_746" + input: "^Assign_747" + input: "^Assign_748" + input: "^Assign_749" + input: "^Assign_750" + input: "^Assign_751" + input: "^Assign_752" + input: "^Assign_753" + input: "^Assign_754" + input: "^Assign_755" + input: "^Assign_756" + input: "^Assign_757" + input: "^Assign_758" + input: "^Assign_759" + input: "^Assign_760" + input: "^Assign_761" + input: "^Assign_762" + input: "^Assign_763" + input: "^Assign_764" + input: "^Assign_765" + input: "^Assign_766" + input: "^Assign_767" + input: "^Assign_768" + input: "^Assign_769" + input: "^Assign_770" + input: "^Assign_771" + input: "^Assign_772" + input: "^Assign_773" + input: "^Assign_774" + input: "^Assign_775" + input: "^Assign_776" + input: "^Assign_777" + input: "^Assign_778" + input: "^Assign_779" + input: "^Assign_780" + input: "^Assign_781" + input: "^Assign_782" + input: "^Assign_783" + input: "^Assign_784" + input: "^Assign_785" + input: "^Assign_786" + input: "^Assign_787" + input: "^Assign_788" + input: "^Assign_789" + input: "^Assign_790" + input: "^Assign_791" + input: "^Assign_792" + input: "^Assign_793" + input: "^Assign_794" + input: "^Assign_795" + input: "^Assign_796" + input: "^Assign_797" + input: "^Assign_798" + input: "^Assign_799" + input: "^Assign_800" + input: "^Assign_801" + input: "^Assign_802" + input: "^Assign_803" + input: "^Assign_804" + input: "^Assign_805" + input: "^Assign_806" + input: "^Assign_807" + input: "^Assign_808" + input: "^Assign_809" + input: "^Assign_810" + input: "^Assign_811" + input: "^Assign_812" + input: "^Assign_813" + input: "^Assign_814" + input: "^Assign_815" + input: "^Assign_816" + input: "^Assign_817" + input: "^Assign_818" + input: "^Assign_819" + input: "^Assign_820" + input: "^Assign_821" + input: "^Assign_822" + input: "^Assign_823" +} +node { + name: "ReadVariableOp" + op: "ReadVariableOp" + input: "global_step" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } +} +node { + name: "add_699/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 1 + } + } + } +} +node { + name: "add_699" + op: "Add" + input: "ReadVariableOp" + input: "add_699/y" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "AssignVariableOp" + op: "AssignVariableOp" + input: "global_step" + input: "add_699" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } +} +node { + name: "ReadVariableOp_1" + op: "ReadVariableOp" + input: "global_step" + input: "^AssignVariableOp" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } +} +node { + name: "group_deps_1" + op: "NoOp" + input: "^ReadVariableOp_1" + input: "^group_deps" +} +node { + name: "loss/tags" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "loss" + } + } + } +} +node { + name: "loss" + op: "ScalarSummary" + input: "loss/tags" + input: "add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "init" + op: "NoOp" + input: "^Assign" + input: "^Assign_1" + input: "^Assign_10" + input: "^Assign_100" + input: "^Assign_101" + input: "^Assign_102" + input: "^Assign_103" + input: "^Assign_104" + input: "^Assign_105" + input: "^Assign_106" + input: "^Assign_107" + input: "^Assign_108" + input: "^Assign_109" + input: "^Assign_11" + input: "^Assign_110" + input: "^Assign_111" + input: "^Assign_112" + input: "^Assign_113" + input: "^Assign_114" + input: "^Assign_115" + input: "^Assign_116" + input: "^Assign_117" + input: "^Assign_118" + input: "^Assign_119" + input: "^Assign_12" + input: "^Assign_120" + input: "^Assign_121" + input: "^Assign_122" + input: "^Assign_123" + input: "^Assign_124" + input: "^Assign_125" + input: "^Assign_126" + input: "^Assign_127" + input: "^Assign_128" + input: "^Assign_129" + input: "^Assign_13" + input: "^Assign_130" + input: "^Assign_131" + input: "^Assign_132" + input: "^Assign_133" + input: "^Assign_134" + input: "^Assign_135" + input: "^Assign_136" + input: "^Assign_137" + input: "^Assign_138" + input: "^Assign_139" + input: "^Assign_14" + input: "^Assign_140" + input: "^Assign_141" + input: "^Assign_142" + input: "^Assign_143" + input: "^Assign_144" + input: "^Assign_145" + input: "^Assign_146" + input: "^Assign_147" + input: "^Assign_148" + input: "^Assign_149" + input: "^Assign_15" + input: "^Assign_150" + input: "^Assign_151" + input: "^Assign_152" + input: "^Assign_153" + input: "^Assign_154" + input: "^Assign_155" + input: "^Assign_156" + input: "^Assign_157" + input: "^Assign_158" + input: "^Assign_159" + input: "^Assign_16" + input: "^Assign_160" + input: "^Assign_161" + input: "^Assign_162" + input: "^Assign_163" + input: "^Assign_164" + input: "^Assign_165" + input: "^Assign_166" + input: "^Assign_167" + input: "^Assign_168" + input: "^Assign_169" + input: "^Assign_17" + input: "^Assign_170" + input: "^Assign_171" + input: "^Assign_172" + input: "^Assign_173" + input: "^Assign_174" + input: "^Assign_175" + input: "^Assign_176" + input: "^Assign_177" + input: "^Assign_178" + input: "^Assign_179" + input: "^Assign_18" + input: "^Assign_180" + input: "^Assign_181" + input: "^Assign_182" + input: "^Assign_183" + input: "^Assign_184" + input: "^Assign_185" + input: "^Assign_186" + input: "^Assign_187" + input: "^Assign_188" + input: "^Assign_189" + input: "^Assign_19" + input: "^Assign_190" + input: "^Assign_191" + input: "^Assign_192" + input: "^Assign_193" + input: "^Assign_194" + input: "^Assign_195" + input: "^Assign_196" + input: "^Assign_197" + input: "^Assign_198" + input: "^Assign_199" + input: "^Assign_2" + input: "^Assign_20" + input: "^Assign_200" + input: "^Assign_201" + input: "^Assign_202" + input: "^Assign_203" + input: "^Assign_204" + input: "^Assign_205" + input: "^Assign_21" + input: "^Assign_22" + input: "^Assign_23" + input: "^Assign_24" + input: "^Assign_25" + input: "^Assign_26" + input: "^Assign_27" + input: "^Assign_28" + input: "^Assign_29" + input: "^Assign_3" + input: "^Assign_30" + input: "^Assign_31" + input: "^Assign_32" + input: "^Assign_33" + input: "^Assign_34" + input: "^Assign_35" + input: "^Assign_36" + input: "^Assign_37" + input: "^Assign_38" + input: "^Assign_39" + input: "^Assign_4" + input: "^Assign_40" + input: "^Assign_41" + input: "^Assign_42" + input: "^Assign_43" + input: "^Assign_44" + input: "^Assign_45" + input: "^Assign_46" + input: "^Assign_47" + input: "^Assign_48" + input: "^Assign_49" + input: "^Assign_5" + input: "^Assign_50" + input: "^Assign_51" + input: "^Assign_52" + input: "^Assign_53" + input: "^Assign_54" + input: "^Assign_55" + input: "^Assign_56" + input: "^Assign_57" + input: "^Assign_58" + input: "^Assign_59" + input: "^Assign_6" + input: "^Assign_60" + input: "^Assign_61" + input: "^Assign_62" + input: "^Assign_63" + input: "^Assign_64" + input: "^Assign_65" + input: "^Assign_66" + input: "^Assign_67" + input: "^Assign_68" + input: "^Assign_69" + input: "^Assign_7" + input: "^Assign_70" + input: "^Assign_71" + input: "^Assign_72" + input: "^Assign_73" + input: "^Assign_74" + input: "^Assign_75" + input: "^Assign_76" + input: "^Assign_77" + input: "^Assign_78" + input: "^Assign_79" + input: "^Assign_8" + input: "^Assign_80" + input: "^Assign_81" + input: "^Assign_82" + input: "^Assign_83" + input: "^Assign_84" + input: "^Assign_85" + input: "^Assign_86" + input: "^Assign_87" + input: "^Assign_88" + input: "^Assign_89" + input: "^Assign_9" + input: "^Assign_90" + input: "^Assign_91" + input: "^Assign_92" + input: "^Assign_93" + input: "^Assign_94" + input: "^Assign_95" + input: "^Assign_96" + input: "^Assign_97" + input: "^Assign_98" + input: "^Assign_99" + input: "^bert/embeddings/LayerNorm/beta/adam_m/Assign" + input: "^bert/embeddings/LayerNorm/beta/adam_v/Assign" + input: "^bert/embeddings/LayerNorm/gamma/adam_m/Assign" + input: "^bert/embeddings/LayerNorm/gamma/adam_v/Assign" + input: "^bert/embeddings/position_embeddings/adam_m/Assign" + input: "^bert/embeddings/position_embeddings/adam_v/Assign" + input: "^bert/embeddings/token_type_embeddings/adam_m/Assign" + input: "^bert/embeddings/token_type_embeddings/adam_v/Assign" + input: "^bert/embeddings/word_embeddings/adam_m/Assign" + input: "^bert/embeddings/word_embeddings/adam_v/Assign" + input: "^bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_0/attention/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_0/attention/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_0/attention/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_0/attention/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_0/attention/self/key/bias/adam_m/Assign" + input: "^bert/encoder/layer_0/attention/self/key/bias/adam_v/Assign" + input: "^bert/encoder/layer_0/attention/self/key/kernel/adam_m/Assign" + input: "^bert/encoder/layer_0/attention/self/key/kernel/adam_v/Assign" + input: "^bert/encoder/layer_0/attention/self/query/bias/adam_m/Assign" + input: "^bert/encoder/layer_0/attention/self/query/bias/adam_v/Assign" + input: "^bert/encoder/layer_0/attention/self/query/kernel/adam_m/Assign" + input: "^bert/encoder/layer_0/attention/self/query/kernel/adam_v/Assign" + input: "^bert/encoder/layer_0/attention/self/value/bias/adam_m/Assign" + input: "^bert/encoder/layer_0/attention/self/value/bias/adam_v/Assign" + input: "^bert/encoder/layer_0/attention/self/value/kernel/adam_m/Assign" + input: "^bert/encoder/layer_0/attention/self/value/kernel/adam_v/Assign" + input: "^bert/encoder/layer_0/intermediate/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_0/intermediate/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_0/intermediate/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_0/intermediate/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_0/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_0/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_0/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_0/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_0/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_0/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_0/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_0/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_1/attention/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_1/attention/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_1/attention/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_1/attention/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_1/attention/self/key/bias/adam_m/Assign" + input: "^bert/encoder/layer_1/attention/self/key/bias/adam_v/Assign" + input: "^bert/encoder/layer_1/attention/self/key/kernel/adam_m/Assign" + input: "^bert/encoder/layer_1/attention/self/key/kernel/adam_v/Assign" + input: "^bert/encoder/layer_1/attention/self/query/bias/adam_m/Assign" + input: "^bert/encoder/layer_1/attention/self/query/bias/adam_v/Assign" + input: "^bert/encoder/layer_1/attention/self/query/kernel/adam_m/Assign" + input: "^bert/encoder/layer_1/attention/self/query/kernel/adam_v/Assign" + input: "^bert/encoder/layer_1/attention/self/value/bias/adam_m/Assign" + input: "^bert/encoder/layer_1/attention/self/value/bias/adam_v/Assign" + input: "^bert/encoder/layer_1/attention/self/value/kernel/adam_m/Assign" + input: "^bert/encoder/layer_1/attention/self/value/kernel/adam_v/Assign" + input: "^bert/encoder/layer_1/intermediate/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_1/intermediate/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_1/intermediate/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_1/intermediate/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_1/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_1/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_1/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_1/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_1/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_1/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_1/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_1/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_10/attention/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_10/attention/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_10/attention/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_10/attention/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_10/attention/self/key/bias/adam_m/Assign" + input: "^bert/encoder/layer_10/attention/self/key/bias/adam_v/Assign" + input: "^bert/encoder/layer_10/attention/self/key/kernel/adam_m/Assign" + input: "^bert/encoder/layer_10/attention/self/key/kernel/adam_v/Assign" + input: "^bert/encoder/layer_10/attention/self/query/bias/adam_m/Assign" + input: "^bert/encoder/layer_10/attention/self/query/bias/adam_v/Assign" + input: "^bert/encoder/layer_10/attention/self/query/kernel/adam_m/Assign" + input: "^bert/encoder/layer_10/attention/self/query/kernel/adam_v/Assign" + input: "^bert/encoder/layer_10/attention/self/value/bias/adam_m/Assign" + input: "^bert/encoder/layer_10/attention/self/value/bias/adam_v/Assign" + input: "^bert/encoder/layer_10/attention/self/value/kernel/adam_m/Assign" + input: "^bert/encoder/layer_10/attention/self/value/kernel/adam_v/Assign" + input: "^bert/encoder/layer_10/intermediate/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_10/intermediate/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_10/intermediate/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_10/intermediate/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_10/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_10/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_10/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_10/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_10/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_10/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_10/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_10/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_11/attention/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_11/attention/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_11/attention/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_11/attention/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_11/attention/self/key/bias/adam_m/Assign" + input: "^bert/encoder/layer_11/attention/self/key/bias/adam_v/Assign" + input: "^bert/encoder/layer_11/attention/self/key/kernel/adam_m/Assign" + input: "^bert/encoder/layer_11/attention/self/key/kernel/adam_v/Assign" + input: "^bert/encoder/layer_11/attention/self/query/bias/adam_m/Assign" + input: "^bert/encoder/layer_11/attention/self/query/bias/adam_v/Assign" + input: "^bert/encoder/layer_11/attention/self/query/kernel/adam_m/Assign" + input: "^bert/encoder/layer_11/attention/self/query/kernel/adam_v/Assign" + input: "^bert/encoder/layer_11/attention/self/value/bias/adam_m/Assign" + input: "^bert/encoder/layer_11/attention/self/value/bias/adam_v/Assign" + input: "^bert/encoder/layer_11/attention/self/value/kernel/adam_m/Assign" + input: "^bert/encoder/layer_11/attention/self/value/kernel/adam_v/Assign" + input: "^bert/encoder/layer_11/intermediate/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_11/intermediate/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_11/intermediate/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_11/intermediate/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_11/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_11/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_11/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_11/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_11/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_11/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_11/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_11/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_2/attention/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_2/attention/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_2/attention/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_2/attention/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_2/attention/self/key/bias/adam_m/Assign" + input: "^bert/encoder/layer_2/attention/self/key/bias/adam_v/Assign" + input: "^bert/encoder/layer_2/attention/self/key/kernel/adam_m/Assign" + input: "^bert/encoder/layer_2/attention/self/key/kernel/adam_v/Assign" + input: "^bert/encoder/layer_2/attention/self/query/bias/adam_m/Assign" + input: "^bert/encoder/layer_2/attention/self/query/bias/adam_v/Assign" + input: "^bert/encoder/layer_2/attention/self/query/kernel/adam_m/Assign" + input: "^bert/encoder/layer_2/attention/self/query/kernel/adam_v/Assign" + input: "^bert/encoder/layer_2/attention/self/value/bias/adam_m/Assign" + input: "^bert/encoder/layer_2/attention/self/value/bias/adam_v/Assign" + input: "^bert/encoder/layer_2/attention/self/value/kernel/adam_m/Assign" + input: "^bert/encoder/layer_2/attention/self/value/kernel/adam_v/Assign" + input: "^bert/encoder/layer_2/intermediate/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_2/intermediate/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_2/intermediate/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_2/intermediate/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_2/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_2/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_2/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_2/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_2/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_2/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_2/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_2/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_3/attention/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_3/attention/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_3/attention/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_3/attention/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_3/attention/self/key/bias/adam_m/Assign" + input: "^bert/encoder/layer_3/attention/self/key/bias/adam_v/Assign" + input: "^bert/encoder/layer_3/attention/self/key/kernel/adam_m/Assign" + input: "^bert/encoder/layer_3/attention/self/key/kernel/adam_v/Assign" + input: "^bert/encoder/layer_3/attention/self/query/bias/adam_m/Assign" + input: "^bert/encoder/layer_3/attention/self/query/bias/adam_v/Assign" + input: "^bert/encoder/layer_3/attention/self/query/kernel/adam_m/Assign" + input: "^bert/encoder/layer_3/attention/self/query/kernel/adam_v/Assign" + input: "^bert/encoder/layer_3/attention/self/value/bias/adam_m/Assign" + input: "^bert/encoder/layer_3/attention/self/value/bias/adam_v/Assign" + input: "^bert/encoder/layer_3/attention/self/value/kernel/adam_m/Assign" + input: "^bert/encoder/layer_3/attention/self/value/kernel/adam_v/Assign" + input: "^bert/encoder/layer_3/intermediate/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_3/intermediate/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_3/intermediate/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_3/intermediate/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_3/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_3/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_3/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_3/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_3/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_3/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_3/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_3/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_4/attention/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_4/attention/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_4/attention/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_4/attention/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_4/attention/self/key/bias/adam_m/Assign" + input: "^bert/encoder/layer_4/attention/self/key/bias/adam_v/Assign" + input: "^bert/encoder/layer_4/attention/self/key/kernel/adam_m/Assign" + input: "^bert/encoder/layer_4/attention/self/key/kernel/adam_v/Assign" + input: "^bert/encoder/layer_4/attention/self/query/bias/adam_m/Assign" + input: "^bert/encoder/layer_4/attention/self/query/bias/adam_v/Assign" + input: "^bert/encoder/layer_4/attention/self/query/kernel/adam_m/Assign" + input: "^bert/encoder/layer_4/attention/self/query/kernel/adam_v/Assign" + input: "^bert/encoder/layer_4/attention/self/value/bias/adam_m/Assign" + input: "^bert/encoder/layer_4/attention/self/value/bias/adam_v/Assign" + input: "^bert/encoder/layer_4/attention/self/value/kernel/adam_m/Assign" + input: "^bert/encoder/layer_4/attention/self/value/kernel/adam_v/Assign" + input: "^bert/encoder/layer_4/intermediate/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_4/intermediate/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_4/intermediate/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_4/intermediate/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_4/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_4/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_4/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_4/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_4/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_4/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_4/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_4/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_5/attention/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_5/attention/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_5/attention/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_5/attention/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_5/attention/self/key/bias/adam_m/Assign" + input: "^bert/encoder/layer_5/attention/self/key/bias/adam_v/Assign" + input: "^bert/encoder/layer_5/attention/self/key/kernel/adam_m/Assign" + input: "^bert/encoder/layer_5/attention/self/key/kernel/adam_v/Assign" + input: "^bert/encoder/layer_5/attention/self/query/bias/adam_m/Assign" + input: "^bert/encoder/layer_5/attention/self/query/bias/adam_v/Assign" + input: "^bert/encoder/layer_5/attention/self/query/kernel/adam_m/Assign" + input: "^bert/encoder/layer_5/attention/self/query/kernel/adam_v/Assign" + input: "^bert/encoder/layer_5/attention/self/value/bias/adam_m/Assign" + input: "^bert/encoder/layer_5/attention/self/value/bias/adam_v/Assign" + input: "^bert/encoder/layer_5/attention/self/value/kernel/adam_m/Assign" + input: "^bert/encoder/layer_5/attention/self/value/kernel/adam_v/Assign" + input: "^bert/encoder/layer_5/intermediate/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_5/intermediate/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_5/intermediate/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_5/intermediate/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_5/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_5/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_5/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_5/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_5/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_5/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_5/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_5/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_6/attention/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_6/attention/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_6/attention/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_6/attention/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_6/attention/self/key/bias/adam_m/Assign" + input: "^bert/encoder/layer_6/attention/self/key/bias/adam_v/Assign" + input: "^bert/encoder/layer_6/attention/self/key/kernel/adam_m/Assign" + input: "^bert/encoder/layer_6/attention/self/key/kernel/adam_v/Assign" + input: "^bert/encoder/layer_6/attention/self/query/bias/adam_m/Assign" + input: "^bert/encoder/layer_6/attention/self/query/bias/adam_v/Assign" + input: "^bert/encoder/layer_6/attention/self/query/kernel/adam_m/Assign" + input: "^bert/encoder/layer_6/attention/self/query/kernel/adam_v/Assign" + input: "^bert/encoder/layer_6/attention/self/value/bias/adam_m/Assign" + input: "^bert/encoder/layer_6/attention/self/value/bias/adam_v/Assign" + input: "^bert/encoder/layer_6/attention/self/value/kernel/adam_m/Assign" + input: "^bert/encoder/layer_6/attention/self/value/kernel/adam_v/Assign" + input: "^bert/encoder/layer_6/intermediate/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_6/intermediate/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_6/intermediate/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_6/intermediate/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_6/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_6/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_6/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_6/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_6/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_6/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_6/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_6/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_7/attention/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_7/attention/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_7/attention/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_7/attention/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_7/attention/self/key/bias/adam_m/Assign" + input: "^bert/encoder/layer_7/attention/self/key/bias/adam_v/Assign" + input: "^bert/encoder/layer_7/attention/self/key/kernel/adam_m/Assign" + input: "^bert/encoder/layer_7/attention/self/key/kernel/adam_v/Assign" + input: "^bert/encoder/layer_7/attention/self/query/bias/adam_m/Assign" + input: "^bert/encoder/layer_7/attention/self/query/bias/adam_v/Assign" + input: "^bert/encoder/layer_7/attention/self/query/kernel/adam_m/Assign" + input: "^bert/encoder/layer_7/attention/self/query/kernel/adam_v/Assign" + input: "^bert/encoder/layer_7/attention/self/value/bias/adam_m/Assign" + input: "^bert/encoder/layer_7/attention/self/value/bias/adam_v/Assign" + input: "^bert/encoder/layer_7/attention/self/value/kernel/adam_m/Assign" + input: "^bert/encoder/layer_7/attention/self/value/kernel/adam_v/Assign" + input: "^bert/encoder/layer_7/intermediate/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_7/intermediate/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_7/intermediate/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_7/intermediate/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_7/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_7/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_7/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_7/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_7/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_7/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_7/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_7/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_8/attention/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_8/attention/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_8/attention/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_8/attention/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_8/attention/self/key/bias/adam_m/Assign" + input: "^bert/encoder/layer_8/attention/self/key/bias/adam_v/Assign" + input: "^bert/encoder/layer_8/attention/self/key/kernel/adam_m/Assign" + input: "^bert/encoder/layer_8/attention/self/key/kernel/adam_v/Assign" + input: "^bert/encoder/layer_8/attention/self/query/bias/adam_m/Assign" + input: "^bert/encoder/layer_8/attention/self/query/bias/adam_v/Assign" + input: "^bert/encoder/layer_8/attention/self/query/kernel/adam_m/Assign" + input: "^bert/encoder/layer_8/attention/self/query/kernel/adam_v/Assign" + input: "^bert/encoder/layer_8/attention/self/value/bias/adam_m/Assign" + input: "^bert/encoder/layer_8/attention/self/value/bias/adam_v/Assign" + input: "^bert/encoder/layer_8/attention/self/value/kernel/adam_m/Assign" + input: "^bert/encoder/layer_8/attention/self/value/kernel/adam_v/Assign" + input: "^bert/encoder/layer_8/intermediate/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_8/intermediate/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_8/intermediate/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_8/intermediate/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_8/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_8/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_8/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_8/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_8/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_8/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_8/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_8/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_9/attention/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_9/attention/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_9/attention/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_9/attention/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_9/attention/self/key/bias/adam_m/Assign" + input: "^bert/encoder/layer_9/attention/self/key/bias/adam_v/Assign" + input: "^bert/encoder/layer_9/attention/self/key/kernel/adam_m/Assign" + input: "^bert/encoder/layer_9/attention/self/key/kernel/adam_v/Assign" + input: "^bert/encoder/layer_9/attention/self/query/bias/adam_m/Assign" + input: "^bert/encoder/layer_9/attention/self/query/bias/adam_v/Assign" + input: "^bert/encoder/layer_9/attention/self/query/kernel/adam_m/Assign" + input: "^bert/encoder/layer_9/attention/self/query/kernel/adam_v/Assign" + input: "^bert/encoder/layer_9/attention/self/value/bias/adam_m/Assign" + input: "^bert/encoder/layer_9/attention/self/value/bias/adam_v/Assign" + input: "^bert/encoder/layer_9/attention/self/value/kernel/adam_m/Assign" + input: "^bert/encoder/layer_9/attention/self/value/kernel/adam_v/Assign" + input: "^bert/encoder/layer_9/intermediate/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_9/intermediate/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_9/intermediate/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_9/intermediate/dense/kernel/adam_v/Assign" + input: "^bert/encoder/layer_9/output/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/layer_9/output/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/layer_9/output/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/layer_9/output/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/layer_9/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/layer_9/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/layer_9/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/layer_9/output/dense/kernel/adam_v/Assign" + input: "^bert/pooler/dense/bias/adam_m/Assign" + input: "^bert/pooler/dense/bias/adam_v/Assign" + input: "^bert/pooler/dense/kernel/adam_m/Assign" + input: "^bert/pooler/dense/kernel/adam_v/Assign" + input: "^cls/predictions/output_bias/adam_m/Assign" + input: "^cls/predictions/output_bias/adam_v/Assign" + input: "^cls/predictions/transform/LayerNorm/beta/adam_m/Assign" + input: "^cls/predictions/transform/LayerNorm/beta/adam_v/Assign" + input: "^cls/predictions/transform/LayerNorm/gamma/adam_m/Assign" + input: "^cls/predictions/transform/LayerNorm/gamma/adam_v/Assign" + input: "^cls/predictions/transform/dense/bias/adam_m/Assign" + input: "^cls/predictions/transform/dense/bias/adam_v/Assign" + input: "^cls/predictions/transform/dense/kernel/adam_m/Assign" + input: "^cls/predictions/transform/dense/kernel/adam_v/Assign" + input: "^cls/seq_relationship/output_bias/adam_m/Assign" + input: "^cls/seq_relationship/output_bias/adam_v/Assign" + input: "^cls/seq_relationship/output_weights/adam_m/Assign" + input: "^cls/seq_relationship/output_weights/adam_v/Assign" + input: "^global_step/Assign" +} +node { + name: "init_1" + op: "NoOp" +} +node { + name: "group_deps_2" + op: "NoOp" + input: "^init" + input: "^init_1" +} +node { + name: "report_uninitialized_variables/VarIsInitializedOp" + op: "VarIsInitializedOp" + input: "global_step" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized" + op: "IsVariableInitialized" + input: "bert/embeddings/word_embeddings" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_1" + op: "IsVariableInitialized" + input: "bert/embeddings/token_type_embeddings" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_2" + op: "IsVariableInitialized" + input: "bert/embeddings/position_embeddings" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_3" + op: "IsVariableInitialized" + input: "bert/embeddings/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_4" + op: "IsVariableInitialized" + input: "bert/embeddings/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_5" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_6" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_7" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_8" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_9" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_10" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_11" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_12" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_13" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_14" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_15" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_16" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_17" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_18" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_19" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_20" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_21" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_22" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_23" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_24" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_25" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_26" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_27" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_28" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_29" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_30" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_31" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_32" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_33" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_34" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_35" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_36" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_37" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_38" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_39" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_40" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_41" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_42" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_43" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_44" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_45" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_46" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_47" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_48" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_49" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_50" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_51" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_52" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_53" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_54" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_55" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_56" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_57" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_58" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_59" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_60" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_61" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_62" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_63" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_64" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_65" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_66" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_67" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_68" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_69" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_70" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_71" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_72" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_73" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_74" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_75" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_76" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_77" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_78" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_79" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_80" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_81" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_82" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_83" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_84" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_85" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_86" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_87" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_88" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_89" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_90" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_91" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_92" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_93" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_94" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_95" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_96" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_97" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_98" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_99" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_100" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_101" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_102" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_103" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_104" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_105" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_106" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_107" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_108" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_109" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_110" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_111" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_112" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_113" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_114" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_115" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_116" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_117" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_118" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_119" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_120" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_121" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_122" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_123" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_124" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_125" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_126" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_127" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_128" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_129" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_130" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_131" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_132" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_133" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_134" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_135" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_136" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_137" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_138" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_139" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_140" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_141" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_142" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_143" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_144" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_145" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_146" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_147" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_148" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_149" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_150" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_151" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_152" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_153" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_154" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_155" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_156" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_157" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_158" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_159" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_160" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_161" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_162" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_163" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_164" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_165" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_166" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_167" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_168" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_169" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_170" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_171" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_172" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_173" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_174" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_175" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_176" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_177" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_178" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_179" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_180" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_181" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_182" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_183" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_184" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_185" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_186" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_187" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_188" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_189" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_190" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_191" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_192" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_193" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_194" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_195" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_196" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_197" + op: "IsVariableInitialized" + input: "bert/pooler/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_198" + op: "IsVariableInitialized" + input: "bert/pooler/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_199" + op: "IsVariableInitialized" + input: "cls/predictions/transform/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_200" + op: "IsVariableInitialized" + input: "cls/predictions/transform/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_201" + op: "IsVariableInitialized" + input: "cls/predictions/transform/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_202" + op: "IsVariableInitialized" + input: "cls/predictions/transform/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_203" + op: "IsVariableInitialized" + input: "cls/predictions/output_bias" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_204" + op: "IsVariableInitialized" + input: "cls/seq_relationship/output_weights" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_205" + op: "IsVariableInitialized" + input: "cls/seq_relationship/output_bias" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_206" + op: "IsVariableInitialized" + input: "bert/embeddings/word_embeddings/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_207" + op: "IsVariableInitialized" + input: "bert/embeddings/word_embeddings/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_208" + op: "IsVariableInitialized" + input: "bert/embeddings/token_type_embeddings/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_209" + op: "IsVariableInitialized" + input: "bert/embeddings/token_type_embeddings/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_210" + op: "IsVariableInitialized" + input: "bert/embeddings/position_embeddings/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_211" + op: "IsVariableInitialized" + input: "bert/embeddings/position_embeddings/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_212" + op: "IsVariableInitialized" + input: "bert/embeddings/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_213" + op: "IsVariableInitialized" + input: "bert/embeddings/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_214" + op: "IsVariableInitialized" + input: "bert/embeddings/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_215" + op: "IsVariableInitialized" + input: "bert/embeddings/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_216" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_217" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_218" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_219" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_220" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_221" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_222" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_223" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_224" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_225" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_226" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_227" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_228" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_229" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_230" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_231" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_232" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_233" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_234" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_235" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_236" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_237" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_238" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_239" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_240" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_241" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_242" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_243" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_244" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_245" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_246" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_247" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_248" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_249" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_250" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_251" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_252" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_253" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_254" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_255" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_256" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_257" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_258" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_259" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_260" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_261" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_262" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_263" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_264" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_265" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_266" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_267" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_268" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_269" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_270" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_271" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_272" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_273" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_274" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_275" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_276" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_277" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_278" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_279" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_280" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_281" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_282" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_283" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_284" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_285" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_286" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_287" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_288" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_289" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_290" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_291" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_292" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_293" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_294" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_295" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_296" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_297" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_298" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_299" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_300" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_301" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_302" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_303" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_304" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_305" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_306" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_307" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_308" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_309" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_310" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_311" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_312" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_313" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_314" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_315" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_316" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_317" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_318" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_319" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_320" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_321" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_322" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_323" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_324" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_325" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_326" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_327" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_328" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_329" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_330" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_331" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_332" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_333" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_334" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_335" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_336" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_337" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_338" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_339" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_340" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_341" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_342" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_343" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_344" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_345" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_346" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_347" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_348" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_349" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_350" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_351" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_352" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_353" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_354" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_355" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_356" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_357" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_358" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_359" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_360" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_361" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_362" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_363" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_364" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_365" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_366" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_367" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_368" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_369" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_370" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_371" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_372" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_373" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_374" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_375" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_376" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_377" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_378" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_379" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_380" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_381" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_382" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_383" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_384" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_385" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_386" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_387" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_388" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_389" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_390" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_391" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_392" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_393" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_394" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_395" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_396" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_397" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_398" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_399" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_400" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_401" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_402" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_403" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_404" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_405" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_406" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_407" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_408" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_409" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_410" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_411" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_412" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_413" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_414" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_415" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_416" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_417" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_418" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_419" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_420" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_421" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_422" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_423" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_424" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_425" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_426" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_427" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_428" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_429" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_430" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_431" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_432" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_433" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_434" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_435" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_436" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_437" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_438" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_439" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_440" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_441" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_442" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_443" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_444" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_445" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_446" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_447" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_448" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_449" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_450" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_451" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_452" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_453" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_454" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_455" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_456" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_457" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_458" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_459" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_460" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_461" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_462" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_463" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_464" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_465" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_466" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_467" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_468" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_469" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_470" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_471" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_472" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_473" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_474" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_475" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_476" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_477" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_478" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_479" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_480" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_481" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_482" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_483" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_484" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_485" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_486" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_487" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_488" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_489" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_490" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_491" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_492" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_493" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_494" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_495" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_496" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_497" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_498" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_499" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_500" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_501" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_502" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_503" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_504" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_505" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_506" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_507" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_508" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_509" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_510" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_511" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_512" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_513" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_514" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_515" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_516" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_517" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_518" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_519" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_520" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_521" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_522" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_523" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_524" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_525" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_526" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_527" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_528" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_529" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_530" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_531" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_532" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_533" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_534" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_535" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_536" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_537" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_538" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_539" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_540" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_541" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_542" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_543" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_544" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_545" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_546" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_547" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_548" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_549" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_550" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_551" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_552" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_553" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_554" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_555" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_556" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_557" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_558" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_559" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_560" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_561" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_562" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_563" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_564" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_565" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_566" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_567" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_568" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_569" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_570" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_571" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_572" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_573" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_574" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_575" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_576" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_577" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_578" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_579" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_580" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_581" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_582" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_583" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_584" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_585" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_586" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_587" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_588" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_589" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_590" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_591" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_592" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_593" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_594" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_595" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_596" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_597" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_598" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_599" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_600" + op: "IsVariableInitialized" + input: "bert/pooler/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_601" + op: "IsVariableInitialized" + input: "bert/pooler/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_602" + op: "IsVariableInitialized" + input: "bert/pooler/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_603" + op: "IsVariableInitialized" + input: "bert/pooler/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_604" + op: "IsVariableInitialized" + input: "cls/predictions/transform/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_605" + op: "IsVariableInitialized" + input: "cls/predictions/transform/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_606" + op: "IsVariableInitialized" + input: "cls/predictions/transform/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_607" + op: "IsVariableInitialized" + input: "cls/predictions/transform/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_608" + op: "IsVariableInitialized" + input: "cls/predictions/transform/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_609" + op: "IsVariableInitialized" + input: "cls/predictions/transform/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_610" + op: "IsVariableInitialized" + input: "cls/predictions/transform/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_611" + op: "IsVariableInitialized" + input: "cls/predictions/transform/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_612" + op: "IsVariableInitialized" + input: "cls/predictions/output_bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_613" + op: "IsVariableInitialized" + input: "cls/predictions/output_bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_614" + op: "IsVariableInitialized" + input: "cls/seq_relationship/output_weights/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_615" + op: "IsVariableInitialized" + input: "cls/seq_relationship/output_weights/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_616" + op: "IsVariableInitialized" + input: "cls/seq_relationship/output_bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_617" + op: "IsVariableInitialized" + input: "cls/seq_relationship/output_bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/stack" + op: "Pack" + input: "report_uninitialized_variables/VarIsInitializedOp" + input: "report_uninitialized_variables/IsVariableInitialized" + input: "report_uninitialized_variables/IsVariableInitialized_1" + input: "report_uninitialized_variables/IsVariableInitialized_2" + input: "report_uninitialized_variables/IsVariableInitialized_3" + input: "report_uninitialized_variables/IsVariableInitialized_4" + input: "report_uninitialized_variables/IsVariableInitialized_5" + input: "report_uninitialized_variables/IsVariableInitialized_6" + input: "report_uninitialized_variables/IsVariableInitialized_7" + input: "report_uninitialized_variables/IsVariableInitialized_8" + input: "report_uninitialized_variables/IsVariableInitialized_9" + input: "report_uninitialized_variables/IsVariableInitialized_10" + input: "report_uninitialized_variables/IsVariableInitialized_11" + input: "report_uninitialized_variables/IsVariableInitialized_12" + input: "report_uninitialized_variables/IsVariableInitialized_13" + input: "report_uninitialized_variables/IsVariableInitialized_14" + input: "report_uninitialized_variables/IsVariableInitialized_15" + input: "report_uninitialized_variables/IsVariableInitialized_16" + input: "report_uninitialized_variables/IsVariableInitialized_17" + input: "report_uninitialized_variables/IsVariableInitialized_18" + input: "report_uninitialized_variables/IsVariableInitialized_19" + input: "report_uninitialized_variables/IsVariableInitialized_20" + input: "report_uninitialized_variables/IsVariableInitialized_21" + input: "report_uninitialized_variables/IsVariableInitialized_22" + input: "report_uninitialized_variables/IsVariableInitialized_23" + input: "report_uninitialized_variables/IsVariableInitialized_24" + input: "report_uninitialized_variables/IsVariableInitialized_25" + input: "report_uninitialized_variables/IsVariableInitialized_26" + input: "report_uninitialized_variables/IsVariableInitialized_27" + input: "report_uninitialized_variables/IsVariableInitialized_28" + input: "report_uninitialized_variables/IsVariableInitialized_29" + input: "report_uninitialized_variables/IsVariableInitialized_30" + input: "report_uninitialized_variables/IsVariableInitialized_31" + input: "report_uninitialized_variables/IsVariableInitialized_32" + input: "report_uninitialized_variables/IsVariableInitialized_33" + input: "report_uninitialized_variables/IsVariableInitialized_34" + input: "report_uninitialized_variables/IsVariableInitialized_35" + input: "report_uninitialized_variables/IsVariableInitialized_36" + input: "report_uninitialized_variables/IsVariableInitialized_37" + input: "report_uninitialized_variables/IsVariableInitialized_38" + input: "report_uninitialized_variables/IsVariableInitialized_39" + input: "report_uninitialized_variables/IsVariableInitialized_40" + input: "report_uninitialized_variables/IsVariableInitialized_41" + input: "report_uninitialized_variables/IsVariableInitialized_42" + input: "report_uninitialized_variables/IsVariableInitialized_43" + input: "report_uninitialized_variables/IsVariableInitialized_44" + input: "report_uninitialized_variables/IsVariableInitialized_45" + input: "report_uninitialized_variables/IsVariableInitialized_46" + input: "report_uninitialized_variables/IsVariableInitialized_47" + input: "report_uninitialized_variables/IsVariableInitialized_48" + input: "report_uninitialized_variables/IsVariableInitialized_49" + input: "report_uninitialized_variables/IsVariableInitialized_50" + input: "report_uninitialized_variables/IsVariableInitialized_51" + input: "report_uninitialized_variables/IsVariableInitialized_52" + input: "report_uninitialized_variables/IsVariableInitialized_53" + input: "report_uninitialized_variables/IsVariableInitialized_54" + input: "report_uninitialized_variables/IsVariableInitialized_55" + input: "report_uninitialized_variables/IsVariableInitialized_56" + input: "report_uninitialized_variables/IsVariableInitialized_57" + input: "report_uninitialized_variables/IsVariableInitialized_58" + input: "report_uninitialized_variables/IsVariableInitialized_59" + input: "report_uninitialized_variables/IsVariableInitialized_60" + input: "report_uninitialized_variables/IsVariableInitialized_61" + input: "report_uninitialized_variables/IsVariableInitialized_62" + input: "report_uninitialized_variables/IsVariableInitialized_63" + input: "report_uninitialized_variables/IsVariableInitialized_64" + input: "report_uninitialized_variables/IsVariableInitialized_65" + input: "report_uninitialized_variables/IsVariableInitialized_66" + input: "report_uninitialized_variables/IsVariableInitialized_67" + input: "report_uninitialized_variables/IsVariableInitialized_68" + input: "report_uninitialized_variables/IsVariableInitialized_69" + input: "report_uninitialized_variables/IsVariableInitialized_70" + input: "report_uninitialized_variables/IsVariableInitialized_71" + input: "report_uninitialized_variables/IsVariableInitialized_72" + input: "report_uninitialized_variables/IsVariableInitialized_73" + input: "report_uninitialized_variables/IsVariableInitialized_74" + input: "report_uninitialized_variables/IsVariableInitialized_75" + input: "report_uninitialized_variables/IsVariableInitialized_76" + input: "report_uninitialized_variables/IsVariableInitialized_77" + input: "report_uninitialized_variables/IsVariableInitialized_78" + input: "report_uninitialized_variables/IsVariableInitialized_79" + input: "report_uninitialized_variables/IsVariableInitialized_80" + input: "report_uninitialized_variables/IsVariableInitialized_81" + input: "report_uninitialized_variables/IsVariableInitialized_82" + input: "report_uninitialized_variables/IsVariableInitialized_83" + input: "report_uninitialized_variables/IsVariableInitialized_84" + input: "report_uninitialized_variables/IsVariableInitialized_85" + input: "report_uninitialized_variables/IsVariableInitialized_86" + input: "report_uninitialized_variables/IsVariableInitialized_87" + input: "report_uninitialized_variables/IsVariableInitialized_88" + input: "report_uninitialized_variables/IsVariableInitialized_89" + input: "report_uninitialized_variables/IsVariableInitialized_90" + input: "report_uninitialized_variables/IsVariableInitialized_91" + input: "report_uninitialized_variables/IsVariableInitialized_92" + input: "report_uninitialized_variables/IsVariableInitialized_93" + input: "report_uninitialized_variables/IsVariableInitialized_94" + input: "report_uninitialized_variables/IsVariableInitialized_95" + input: "report_uninitialized_variables/IsVariableInitialized_96" + input: "report_uninitialized_variables/IsVariableInitialized_97" + input: "report_uninitialized_variables/IsVariableInitialized_98" + input: "report_uninitialized_variables/IsVariableInitialized_99" + input: "report_uninitialized_variables/IsVariableInitialized_100" + input: "report_uninitialized_variables/IsVariableInitialized_101" + input: "report_uninitialized_variables/IsVariableInitialized_102" + input: "report_uninitialized_variables/IsVariableInitialized_103" + input: "report_uninitialized_variables/IsVariableInitialized_104" + input: "report_uninitialized_variables/IsVariableInitialized_105" + input: "report_uninitialized_variables/IsVariableInitialized_106" + input: "report_uninitialized_variables/IsVariableInitialized_107" + input: "report_uninitialized_variables/IsVariableInitialized_108" + input: "report_uninitialized_variables/IsVariableInitialized_109" + input: "report_uninitialized_variables/IsVariableInitialized_110" + input: "report_uninitialized_variables/IsVariableInitialized_111" + input: "report_uninitialized_variables/IsVariableInitialized_112" + input: "report_uninitialized_variables/IsVariableInitialized_113" + input: "report_uninitialized_variables/IsVariableInitialized_114" + input: "report_uninitialized_variables/IsVariableInitialized_115" + input: "report_uninitialized_variables/IsVariableInitialized_116" + input: "report_uninitialized_variables/IsVariableInitialized_117" + input: "report_uninitialized_variables/IsVariableInitialized_118" + input: "report_uninitialized_variables/IsVariableInitialized_119" + input: "report_uninitialized_variables/IsVariableInitialized_120" + input: "report_uninitialized_variables/IsVariableInitialized_121" + input: "report_uninitialized_variables/IsVariableInitialized_122" + input: "report_uninitialized_variables/IsVariableInitialized_123" + input: "report_uninitialized_variables/IsVariableInitialized_124" + input: "report_uninitialized_variables/IsVariableInitialized_125" + input: "report_uninitialized_variables/IsVariableInitialized_126" + input: "report_uninitialized_variables/IsVariableInitialized_127" + input: "report_uninitialized_variables/IsVariableInitialized_128" + input: "report_uninitialized_variables/IsVariableInitialized_129" + input: "report_uninitialized_variables/IsVariableInitialized_130" + input: "report_uninitialized_variables/IsVariableInitialized_131" + input: "report_uninitialized_variables/IsVariableInitialized_132" + input: "report_uninitialized_variables/IsVariableInitialized_133" + input: "report_uninitialized_variables/IsVariableInitialized_134" + input: "report_uninitialized_variables/IsVariableInitialized_135" + input: "report_uninitialized_variables/IsVariableInitialized_136" + input: "report_uninitialized_variables/IsVariableInitialized_137" + input: "report_uninitialized_variables/IsVariableInitialized_138" + input: "report_uninitialized_variables/IsVariableInitialized_139" + input: "report_uninitialized_variables/IsVariableInitialized_140" + input: "report_uninitialized_variables/IsVariableInitialized_141" + input: "report_uninitialized_variables/IsVariableInitialized_142" + input: "report_uninitialized_variables/IsVariableInitialized_143" + input: "report_uninitialized_variables/IsVariableInitialized_144" + input: "report_uninitialized_variables/IsVariableInitialized_145" + input: "report_uninitialized_variables/IsVariableInitialized_146" + input: "report_uninitialized_variables/IsVariableInitialized_147" + input: "report_uninitialized_variables/IsVariableInitialized_148" + input: "report_uninitialized_variables/IsVariableInitialized_149" + input: "report_uninitialized_variables/IsVariableInitialized_150" + input: "report_uninitialized_variables/IsVariableInitialized_151" + input: "report_uninitialized_variables/IsVariableInitialized_152" + input: "report_uninitialized_variables/IsVariableInitialized_153" + input: "report_uninitialized_variables/IsVariableInitialized_154" + input: "report_uninitialized_variables/IsVariableInitialized_155" + input: "report_uninitialized_variables/IsVariableInitialized_156" + input: "report_uninitialized_variables/IsVariableInitialized_157" + input: "report_uninitialized_variables/IsVariableInitialized_158" + input: "report_uninitialized_variables/IsVariableInitialized_159" + input: "report_uninitialized_variables/IsVariableInitialized_160" + input: "report_uninitialized_variables/IsVariableInitialized_161" + input: "report_uninitialized_variables/IsVariableInitialized_162" + input: "report_uninitialized_variables/IsVariableInitialized_163" + input: "report_uninitialized_variables/IsVariableInitialized_164" + input: "report_uninitialized_variables/IsVariableInitialized_165" + input: "report_uninitialized_variables/IsVariableInitialized_166" + input: "report_uninitialized_variables/IsVariableInitialized_167" + input: "report_uninitialized_variables/IsVariableInitialized_168" + input: "report_uninitialized_variables/IsVariableInitialized_169" + input: "report_uninitialized_variables/IsVariableInitialized_170" + input: "report_uninitialized_variables/IsVariableInitialized_171" + input: "report_uninitialized_variables/IsVariableInitialized_172" + input: "report_uninitialized_variables/IsVariableInitialized_173" + input: "report_uninitialized_variables/IsVariableInitialized_174" + input: "report_uninitialized_variables/IsVariableInitialized_175" + input: "report_uninitialized_variables/IsVariableInitialized_176" + input: "report_uninitialized_variables/IsVariableInitialized_177" + input: "report_uninitialized_variables/IsVariableInitialized_178" + input: "report_uninitialized_variables/IsVariableInitialized_179" + input: "report_uninitialized_variables/IsVariableInitialized_180" + input: "report_uninitialized_variables/IsVariableInitialized_181" + input: "report_uninitialized_variables/IsVariableInitialized_182" + input: "report_uninitialized_variables/IsVariableInitialized_183" + input: "report_uninitialized_variables/IsVariableInitialized_184" + input: "report_uninitialized_variables/IsVariableInitialized_185" + input: "report_uninitialized_variables/IsVariableInitialized_186" + input: "report_uninitialized_variables/IsVariableInitialized_187" + input: "report_uninitialized_variables/IsVariableInitialized_188" + input: "report_uninitialized_variables/IsVariableInitialized_189" + input: "report_uninitialized_variables/IsVariableInitialized_190" + input: "report_uninitialized_variables/IsVariableInitialized_191" + input: "report_uninitialized_variables/IsVariableInitialized_192" + input: "report_uninitialized_variables/IsVariableInitialized_193" + input: "report_uninitialized_variables/IsVariableInitialized_194" + input: "report_uninitialized_variables/IsVariableInitialized_195" + input: "report_uninitialized_variables/IsVariableInitialized_196" + input: "report_uninitialized_variables/IsVariableInitialized_197" + input: "report_uninitialized_variables/IsVariableInitialized_198" + input: "report_uninitialized_variables/IsVariableInitialized_199" + input: "report_uninitialized_variables/IsVariableInitialized_200" + input: "report_uninitialized_variables/IsVariableInitialized_201" + input: "report_uninitialized_variables/IsVariableInitialized_202" + input: "report_uninitialized_variables/IsVariableInitialized_203" + input: "report_uninitialized_variables/IsVariableInitialized_204" + input: "report_uninitialized_variables/IsVariableInitialized_205" + input: "report_uninitialized_variables/IsVariableInitialized_206" + input: "report_uninitialized_variables/IsVariableInitialized_207" + input: "report_uninitialized_variables/IsVariableInitialized_208" + input: "report_uninitialized_variables/IsVariableInitialized_209" + input: "report_uninitialized_variables/IsVariableInitialized_210" + input: "report_uninitialized_variables/IsVariableInitialized_211" + input: "report_uninitialized_variables/IsVariableInitialized_212" + input: "report_uninitialized_variables/IsVariableInitialized_213" + input: "report_uninitialized_variables/IsVariableInitialized_214" + input: "report_uninitialized_variables/IsVariableInitialized_215" + input: "report_uninitialized_variables/IsVariableInitialized_216" + input: "report_uninitialized_variables/IsVariableInitialized_217" + input: "report_uninitialized_variables/IsVariableInitialized_218" + input: "report_uninitialized_variables/IsVariableInitialized_219" + input: "report_uninitialized_variables/IsVariableInitialized_220" + input: "report_uninitialized_variables/IsVariableInitialized_221" + input: "report_uninitialized_variables/IsVariableInitialized_222" + input: "report_uninitialized_variables/IsVariableInitialized_223" + input: "report_uninitialized_variables/IsVariableInitialized_224" + input: "report_uninitialized_variables/IsVariableInitialized_225" + input: "report_uninitialized_variables/IsVariableInitialized_226" + input: "report_uninitialized_variables/IsVariableInitialized_227" + input: "report_uninitialized_variables/IsVariableInitialized_228" + input: "report_uninitialized_variables/IsVariableInitialized_229" + input: "report_uninitialized_variables/IsVariableInitialized_230" + input: "report_uninitialized_variables/IsVariableInitialized_231" + input: "report_uninitialized_variables/IsVariableInitialized_232" + input: "report_uninitialized_variables/IsVariableInitialized_233" + input: "report_uninitialized_variables/IsVariableInitialized_234" + input: "report_uninitialized_variables/IsVariableInitialized_235" + input: "report_uninitialized_variables/IsVariableInitialized_236" + input: "report_uninitialized_variables/IsVariableInitialized_237" + input: "report_uninitialized_variables/IsVariableInitialized_238" + input: "report_uninitialized_variables/IsVariableInitialized_239" + input: "report_uninitialized_variables/IsVariableInitialized_240" + input: "report_uninitialized_variables/IsVariableInitialized_241" + input: "report_uninitialized_variables/IsVariableInitialized_242" + input: "report_uninitialized_variables/IsVariableInitialized_243" + input: "report_uninitialized_variables/IsVariableInitialized_244" + input: "report_uninitialized_variables/IsVariableInitialized_245" + input: "report_uninitialized_variables/IsVariableInitialized_246" + input: "report_uninitialized_variables/IsVariableInitialized_247" + input: "report_uninitialized_variables/IsVariableInitialized_248" + input: "report_uninitialized_variables/IsVariableInitialized_249" + input: "report_uninitialized_variables/IsVariableInitialized_250" + input: "report_uninitialized_variables/IsVariableInitialized_251" + input: "report_uninitialized_variables/IsVariableInitialized_252" + input: "report_uninitialized_variables/IsVariableInitialized_253" + input: "report_uninitialized_variables/IsVariableInitialized_254" + input: "report_uninitialized_variables/IsVariableInitialized_255" + input: "report_uninitialized_variables/IsVariableInitialized_256" + input: "report_uninitialized_variables/IsVariableInitialized_257" + input: "report_uninitialized_variables/IsVariableInitialized_258" + input: "report_uninitialized_variables/IsVariableInitialized_259" + input: "report_uninitialized_variables/IsVariableInitialized_260" + input: "report_uninitialized_variables/IsVariableInitialized_261" + input: "report_uninitialized_variables/IsVariableInitialized_262" + input: "report_uninitialized_variables/IsVariableInitialized_263" + input: "report_uninitialized_variables/IsVariableInitialized_264" + input: "report_uninitialized_variables/IsVariableInitialized_265" + input: "report_uninitialized_variables/IsVariableInitialized_266" + input: "report_uninitialized_variables/IsVariableInitialized_267" + input: "report_uninitialized_variables/IsVariableInitialized_268" + input: "report_uninitialized_variables/IsVariableInitialized_269" + input: "report_uninitialized_variables/IsVariableInitialized_270" + input: "report_uninitialized_variables/IsVariableInitialized_271" + input: "report_uninitialized_variables/IsVariableInitialized_272" + input: "report_uninitialized_variables/IsVariableInitialized_273" + input: "report_uninitialized_variables/IsVariableInitialized_274" + input: "report_uninitialized_variables/IsVariableInitialized_275" + input: "report_uninitialized_variables/IsVariableInitialized_276" + input: "report_uninitialized_variables/IsVariableInitialized_277" + input: "report_uninitialized_variables/IsVariableInitialized_278" + input: "report_uninitialized_variables/IsVariableInitialized_279" + input: "report_uninitialized_variables/IsVariableInitialized_280" + input: "report_uninitialized_variables/IsVariableInitialized_281" + input: "report_uninitialized_variables/IsVariableInitialized_282" + input: "report_uninitialized_variables/IsVariableInitialized_283" + input: "report_uninitialized_variables/IsVariableInitialized_284" + input: "report_uninitialized_variables/IsVariableInitialized_285" + input: "report_uninitialized_variables/IsVariableInitialized_286" + input: "report_uninitialized_variables/IsVariableInitialized_287" + input: "report_uninitialized_variables/IsVariableInitialized_288" + input: "report_uninitialized_variables/IsVariableInitialized_289" + input: "report_uninitialized_variables/IsVariableInitialized_290" + input: "report_uninitialized_variables/IsVariableInitialized_291" + input: "report_uninitialized_variables/IsVariableInitialized_292" + input: "report_uninitialized_variables/IsVariableInitialized_293" + input: "report_uninitialized_variables/IsVariableInitialized_294" + input: "report_uninitialized_variables/IsVariableInitialized_295" + input: "report_uninitialized_variables/IsVariableInitialized_296" + input: "report_uninitialized_variables/IsVariableInitialized_297" + input: "report_uninitialized_variables/IsVariableInitialized_298" + input: "report_uninitialized_variables/IsVariableInitialized_299" + input: "report_uninitialized_variables/IsVariableInitialized_300" + input: "report_uninitialized_variables/IsVariableInitialized_301" + input: "report_uninitialized_variables/IsVariableInitialized_302" + input: "report_uninitialized_variables/IsVariableInitialized_303" + input: "report_uninitialized_variables/IsVariableInitialized_304" + input: "report_uninitialized_variables/IsVariableInitialized_305" + input: "report_uninitialized_variables/IsVariableInitialized_306" + input: "report_uninitialized_variables/IsVariableInitialized_307" + input: "report_uninitialized_variables/IsVariableInitialized_308" + input: "report_uninitialized_variables/IsVariableInitialized_309" + input: "report_uninitialized_variables/IsVariableInitialized_310" + input: "report_uninitialized_variables/IsVariableInitialized_311" + input: "report_uninitialized_variables/IsVariableInitialized_312" + input: "report_uninitialized_variables/IsVariableInitialized_313" + input: "report_uninitialized_variables/IsVariableInitialized_314" + input: "report_uninitialized_variables/IsVariableInitialized_315" + input: "report_uninitialized_variables/IsVariableInitialized_316" + input: "report_uninitialized_variables/IsVariableInitialized_317" + input: "report_uninitialized_variables/IsVariableInitialized_318" + input: "report_uninitialized_variables/IsVariableInitialized_319" + input: "report_uninitialized_variables/IsVariableInitialized_320" + input: "report_uninitialized_variables/IsVariableInitialized_321" + input: "report_uninitialized_variables/IsVariableInitialized_322" + input: "report_uninitialized_variables/IsVariableInitialized_323" + input: "report_uninitialized_variables/IsVariableInitialized_324" + input: "report_uninitialized_variables/IsVariableInitialized_325" + input: "report_uninitialized_variables/IsVariableInitialized_326" + input: "report_uninitialized_variables/IsVariableInitialized_327" + input: "report_uninitialized_variables/IsVariableInitialized_328" + input: "report_uninitialized_variables/IsVariableInitialized_329" + input: "report_uninitialized_variables/IsVariableInitialized_330" + input: "report_uninitialized_variables/IsVariableInitialized_331" + input: "report_uninitialized_variables/IsVariableInitialized_332" + input: "report_uninitialized_variables/IsVariableInitialized_333" + input: "report_uninitialized_variables/IsVariableInitialized_334" + input: "report_uninitialized_variables/IsVariableInitialized_335" + input: "report_uninitialized_variables/IsVariableInitialized_336" + input: "report_uninitialized_variables/IsVariableInitialized_337" + input: "report_uninitialized_variables/IsVariableInitialized_338" + input: "report_uninitialized_variables/IsVariableInitialized_339" + input: "report_uninitialized_variables/IsVariableInitialized_340" + input: "report_uninitialized_variables/IsVariableInitialized_341" + input: "report_uninitialized_variables/IsVariableInitialized_342" + input: "report_uninitialized_variables/IsVariableInitialized_343" + input: "report_uninitialized_variables/IsVariableInitialized_344" + input: "report_uninitialized_variables/IsVariableInitialized_345" + input: "report_uninitialized_variables/IsVariableInitialized_346" + input: "report_uninitialized_variables/IsVariableInitialized_347" + input: "report_uninitialized_variables/IsVariableInitialized_348" + input: "report_uninitialized_variables/IsVariableInitialized_349" + input: "report_uninitialized_variables/IsVariableInitialized_350" + input: "report_uninitialized_variables/IsVariableInitialized_351" + input: "report_uninitialized_variables/IsVariableInitialized_352" + input: "report_uninitialized_variables/IsVariableInitialized_353" + input: "report_uninitialized_variables/IsVariableInitialized_354" + input: "report_uninitialized_variables/IsVariableInitialized_355" + input: "report_uninitialized_variables/IsVariableInitialized_356" + input: "report_uninitialized_variables/IsVariableInitialized_357" + input: "report_uninitialized_variables/IsVariableInitialized_358" + input: "report_uninitialized_variables/IsVariableInitialized_359" + input: "report_uninitialized_variables/IsVariableInitialized_360" + input: "report_uninitialized_variables/IsVariableInitialized_361" + input: "report_uninitialized_variables/IsVariableInitialized_362" + input: "report_uninitialized_variables/IsVariableInitialized_363" + input: "report_uninitialized_variables/IsVariableInitialized_364" + input: "report_uninitialized_variables/IsVariableInitialized_365" + input: "report_uninitialized_variables/IsVariableInitialized_366" + input: "report_uninitialized_variables/IsVariableInitialized_367" + input: "report_uninitialized_variables/IsVariableInitialized_368" + input: "report_uninitialized_variables/IsVariableInitialized_369" + input: "report_uninitialized_variables/IsVariableInitialized_370" + input: "report_uninitialized_variables/IsVariableInitialized_371" + input: "report_uninitialized_variables/IsVariableInitialized_372" + input: "report_uninitialized_variables/IsVariableInitialized_373" + input: "report_uninitialized_variables/IsVariableInitialized_374" + input: "report_uninitialized_variables/IsVariableInitialized_375" + input: "report_uninitialized_variables/IsVariableInitialized_376" + input: "report_uninitialized_variables/IsVariableInitialized_377" + input: "report_uninitialized_variables/IsVariableInitialized_378" + input: "report_uninitialized_variables/IsVariableInitialized_379" + input: "report_uninitialized_variables/IsVariableInitialized_380" + input: "report_uninitialized_variables/IsVariableInitialized_381" + input: "report_uninitialized_variables/IsVariableInitialized_382" + input: "report_uninitialized_variables/IsVariableInitialized_383" + input: "report_uninitialized_variables/IsVariableInitialized_384" + input: "report_uninitialized_variables/IsVariableInitialized_385" + input: "report_uninitialized_variables/IsVariableInitialized_386" + input: "report_uninitialized_variables/IsVariableInitialized_387" + input: "report_uninitialized_variables/IsVariableInitialized_388" + input: "report_uninitialized_variables/IsVariableInitialized_389" + input: "report_uninitialized_variables/IsVariableInitialized_390" + input: "report_uninitialized_variables/IsVariableInitialized_391" + input: "report_uninitialized_variables/IsVariableInitialized_392" + input: "report_uninitialized_variables/IsVariableInitialized_393" + input: "report_uninitialized_variables/IsVariableInitialized_394" + input: "report_uninitialized_variables/IsVariableInitialized_395" + input: "report_uninitialized_variables/IsVariableInitialized_396" + input: "report_uninitialized_variables/IsVariableInitialized_397" + input: "report_uninitialized_variables/IsVariableInitialized_398" + input: "report_uninitialized_variables/IsVariableInitialized_399" + input: "report_uninitialized_variables/IsVariableInitialized_400" + input: "report_uninitialized_variables/IsVariableInitialized_401" + input: "report_uninitialized_variables/IsVariableInitialized_402" + input: "report_uninitialized_variables/IsVariableInitialized_403" + input: "report_uninitialized_variables/IsVariableInitialized_404" + input: "report_uninitialized_variables/IsVariableInitialized_405" + input: "report_uninitialized_variables/IsVariableInitialized_406" + input: "report_uninitialized_variables/IsVariableInitialized_407" + input: "report_uninitialized_variables/IsVariableInitialized_408" + input: "report_uninitialized_variables/IsVariableInitialized_409" + input: "report_uninitialized_variables/IsVariableInitialized_410" + input: "report_uninitialized_variables/IsVariableInitialized_411" + input: "report_uninitialized_variables/IsVariableInitialized_412" + input: "report_uninitialized_variables/IsVariableInitialized_413" + input: "report_uninitialized_variables/IsVariableInitialized_414" + input: "report_uninitialized_variables/IsVariableInitialized_415" + input: "report_uninitialized_variables/IsVariableInitialized_416" + input: "report_uninitialized_variables/IsVariableInitialized_417" + input: "report_uninitialized_variables/IsVariableInitialized_418" + input: "report_uninitialized_variables/IsVariableInitialized_419" + input: "report_uninitialized_variables/IsVariableInitialized_420" + input: "report_uninitialized_variables/IsVariableInitialized_421" + input: "report_uninitialized_variables/IsVariableInitialized_422" + input: "report_uninitialized_variables/IsVariableInitialized_423" + input: "report_uninitialized_variables/IsVariableInitialized_424" + input: "report_uninitialized_variables/IsVariableInitialized_425" + input: "report_uninitialized_variables/IsVariableInitialized_426" + input: "report_uninitialized_variables/IsVariableInitialized_427" + input: "report_uninitialized_variables/IsVariableInitialized_428" + input: "report_uninitialized_variables/IsVariableInitialized_429" + input: "report_uninitialized_variables/IsVariableInitialized_430" + input: "report_uninitialized_variables/IsVariableInitialized_431" + input: "report_uninitialized_variables/IsVariableInitialized_432" + input: "report_uninitialized_variables/IsVariableInitialized_433" + input: "report_uninitialized_variables/IsVariableInitialized_434" + input: "report_uninitialized_variables/IsVariableInitialized_435" + input: "report_uninitialized_variables/IsVariableInitialized_436" + input: "report_uninitialized_variables/IsVariableInitialized_437" + input: "report_uninitialized_variables/IsVariableInitialized_438" + input: "report_uninitialized_variables/IsVariableInitialized_439" + input: "report_uninitialized_variables/IsVariableInitialized_440" + input: "report_uninitialized_variables/IsVariableInitialized_441" + input: "report_uninitialized_variables/IsVariableInitialized_442" + input: "report_uninitialized_variables/IsVariableInitialized_443" + input: "report_uninitialized_variables/IsVariableInitialized_444" + input: "report_uninitialized_variables/IsVariableInitialized_445" + input: "report_uninitialized_variables/IsVariableInitialized_446" + input: "report_uninitialized_variables/IsVariableInitialized_447" + input: "report_uninitialized_variables/IsVariableInitialized_448" + input: "report_uninitialized_variables/IsVariableInitialized_449" + input: "report_uninitialized_variables/IsVariableInitialized_450" + input: "report_uninitialized_variables/IsVariableInitialized_451" + input: "report_uninitialized_variables/IsVariableInitialized_452" + input: "report_uninitialized_variables/IsVariableInitialized_453" + input: "report_uninitialized_variables/IsVariableInitialized_454" + input: "report_uninitialized_variables/IsVariableInitialized_455" + input: "report_uninitialized_variables/IsVariableInitialized_456" + input: "report_uninitialized_variables/IsVariableInitialized_457" + input: "report_uninitialized_variables/IsVariableInitialized_458" + input: "report_uninitialized_variables/IsVariableInitialized_459" + input: "report_uninitialized_variables/IsVariableInitialized_460" + input: "report_uninitialized_variables/IsVariableInitialized_461" + input: "report_uninitialized_variables/IsVariableInitialized_462" + input: "report_uninitialized_variables/IsVariableInitialized_463" + input: "report_uninitialized_variables/IsVariableInitialized_464" + input: "report_uninitialized_variables/IsVariableInitialized_465" + input: "report_uninitialized_variables/IsVariableInitialized_466" + input: "report_uninitialized_variables/IsVariableInitialized_467" + input: "report_uninitialized_variables/IsVariableInitialized_468" + input: "report_uninitialized_variables/IsVariableInitialized_469" + input: "report_uninitialized_variables/IsVariableInitialized_470" + input: "report_uninitialized_variables/IsVariableInitialized_471" + input: "report_uninitialized_variables/IsVariableInitialized_472" + input: "report_uninitialized_variables/IsVariableInitialized_473" + input: "report_uninitialized_variables/IsVariableInitialized_474" + input: "report_uninitialized_variables/IsVariableInitialized_475" + input: "report_uninitialized_variables/IsVariableInitialized_476" + input: "report_uninitialized_variables/IsVariableInitialized_477" + input: "report_uninitialized_variables/IsVariableInitialized_478" + input: "report_uninitialized_variables/IsVariableInitialized_479" + input: "report_uninitialized_variables/IsVariableInitialized_480" + input: "report_uninitialized_variables/IsVariableInitialized_481" + input: "report_uninitialized_variables/IsVariableInitialized_482" + input: "report_uninitialized_variables/IsVariableInitialized_483" + input: "report_uninitialized_variables/IsVariableInitialized_484" + input: "report_uninitialized_variables/IsVariableInitialized_485" + input: "report_uninitialized_variables/IsVariableInitialized_486" + input: "report_uninitialized_variables/IsVariableInitialized_487" + input: "report_uninitialized_variables/IsVariableInitialized_488" + input: "report_uninitialized_variables/IsVariableInitialized_489" + input: "report_uninitialized_variables/IsVariableInitialized_490" + input: "report_uninitialized_variables/IsVariableInitialized_491" + input: "report_uninitialized_variables/IsVariableInitialized_492" + input: "report_uninitialized_variables/IsVariableInitialized_493" + input: "report_uninitialized_variables/IsVariableInitialized_494" + input: "report_uninitialized_variables/IsVariableInitialized_495" + input: "report_uninitialized_variables/IsVariableInitialized_496" + input: "report_uninitialized_variables/IsVariableInitialized_497" + input: "report_uninitialized_variables/IsVariableInitialized_498" + input: "report_uninitialized_variables/IsVariableInitialized_499" + input: "report_uninitialized_variables/IsVariableInitialized_500" + input: "report_uninitialized_variables/IsVariableInitialized_501" + input: "report_uninitialized_variables/IsVariableInitialized_502" + input: "report_uninitialized_variables/IsVariableInitialized_503" + input: "report_uninitialized_variables/IsVariableInitialized_504" + input: "report_uninitialized_variables/IsVariableInitialized_505" + input: "report_uninitialized_variables/IsVariableInitialized_506" + input: "report_uninitialized_variables/IsVariableInitialized_507" + input: "report_uninitialized_variables/IsVariableInitialized_508" + input: "report_uninitialized_variables/IsVariableInitialized_509" + input: "report_uninitialized_variables/IsVariableInitialized_510" + input: "report_uninitialized_variables/IsVariableInitialized_511" + input: "report_uninitialized_variables/IsVariableInitialized_512" + input: "report_uninitialized_variables/IsVariableInitialized_513" + input: "report_uninitialized_variables/IsVariableInitialized_514" + input: "report_uninitialized_variables/IsVariableInitialized_515" + input: "report_uninitialized_variables/IsVariableInitialized_516" + input: "report_uninitialized_variables/IsVariableInitialized_517" + input: "report_uninitialized_variables/IsVariableInitialized_518" + input: "report_uninitialized_variables/IsVariableInitialized_519" + input: "report_uninitialized_variables/IsVariableInitialized_520" + input: "report_uninitialized_variables/IsVariableInitialized_521" + input: "report_uninitialized_variables/IsVariableInitialized_522" + input: "report_uninitialized_variables/IsVariableInitialized_523" + input: "report_uninitialized_variables/IsVariableInitialized_524" + input: "report_uninitialized_variables/IsVariableInitialized_525" + input: "report_uninitialized_variables/IsVariableInitialized_526" + input: "report_uninitialized_variables/IsVariableInitialized_527" + input: "report_uninitialized_variables/IsVariableInitialized_528" + input: "report_uninitialized_variables/IsVariableInitialized_529" + input: "report_uninitialized_variables/IsVariableInitialized_530" + input: "report_uninitialized_variables/IsVariableInitialized_531" + input: "report_uninitialized_variables/IsVariableInitialized_532" + input: "report_uninitialized_variables/IsVariableInitialized_533" + input: "report_uninitialized_variables/IsVariableInitialized_534" + input: "report_uninitialized_variables/IsVariableInitialized_535" + input: "report_uninitialized_variables/IsVariableInitialized_536" + input: "report_uninitialized_variables/IsVariableInitialized_537" + input: "report_uninitialized_variables/IsVariableInitialized_538" + input: "report_uninitialized_variables/IsVariableInitialized_539" + input: "report_uninitialized_variables/IsVariableInitialized_540" + input: "report_uninitialized_variables/IsVariableInitialized_541" + input: "report_uninitialized_variables/IsVariableInitialized_542" + input: "report_uninitialized_variables/IsVariableInitialized_543" + input: "report_uninitialized_variables/IsVariableInitialized_544" + input: "report_uninitialized_variables/IsVariableInitialized_545" + input: "report_uninitialized_variables/IsVariableInitialized_546" + input: "report_uninitialized_variables/IsVariableInitialized_547" + input: "report_uninitialized_variables/IsVariableInitialized_548" + input: "report_uninitialized_variables/IsVariableInitialized_549" + input: "report_uninitialized_variables/IsVariableInitialized_550" + input: "report_uninitialized_variables/IsVariableInitialized_551" + input: "report_uninitialized_variables/IsVariableInitialized_552" + input: "report_uninitialized_variables/IsVariableInitialized_553" + input: "report_uninitialized_variables/IsVariableInitialized_554" + input: "report_uninitialized_variables/IsVariableInitialized_555" + input: "report_uninitialized_variables/IsVariableInitialized_556" + input: "report_uninitialized_variables/IsVariableInitialized_557" + input: "report_uninitialized_variables/IsVariableInitialized_558" + input: "report_uninitialized_variables/IsVariableInitialized_559" + input: "report_uninitialized_variables/IsVariableInitialized_560" + input: "report_uninitialized_variables/IsVariableInitialized_561" + input: "report_uninitialized_variables/IsVariableInitialized_562" + input: "report_uninitialized_variables/IsVariableInitialized_563" + input: "report_uninitialized_variables/IsVariableInitialized_564" + input: "report_uninitialized_variables/IsVariableInitialized_565" + input: "report_uninitialized_variables/IsVariableInitialized_566" + input: "report_uninitialized_variables/IsVariableInitialized_567" + input: "report_uninitialized_variables/IsVariableInitialized_568" + input: "report_uninitialized_variables/IsVariableInitialized_569" + input: "report_uninitialized_variables/IsVariableInitialized_570" + input: "report_uninitialized_variables/IsVariableInitialized_571" + input: "report_uninitialized_variables/IsVariableInitialized_572" + input: "report_uninitialized_variables/IsVariableInitialized_573" + input: "report_uninitialized_variables/IsVariableInitialized_574" + input: "report_uninitialized_variables/IsVariableInitialized_575" + input: "report_uninitialized_variables/IsVariableInitialized_576" + input: "report_uninitialized_variables/IsVariableInitialized_577" + input: "report_uninitialized_variables/IsVariableInitialized_578" + input: "report_uninitialized_variables/IsVariableInitialized_579" + input: "report_uninitialized_variables/IsVariableInitialized_580" + input: "report_uninitialized_variables/IsVariableInitialized_581" + input: "report_uninitialized_variables/IsVariableInitialized_582" + input: "report_uninitialized_variables/IsVariableInitialized_583" + input: "report_uninitialized_variables/IsVariableInitialized_584" + input: "report_uninitialized_variables/IsVariableInitialized_585" + input: "report_uninitialized_variables/IsVariableInitialized_586" + input: "report_uninitialized_variables/IsVariableInitialized_587" + input: "report_uninitialized_variables/IsVariableInitialized_588" + input: "report_uninitialized_variables/IsVariableInitialized_589" + input: "report_uninitialized_variables/IsVariableInitialized_590" + input: "report_uninitialized_variables/IsVariableInitialized_591" + input: "report_uninitialized_variables/IsVariableInitialized_592" + input: "report_uninitialized_variables/IsVariableInitialized_593" + input: "report_uninitialized_variables/IsVariableInitialized_594" + input: "report_uninitialized_variables/IsVariableInitialized_595" + input: "report_uninitialized_variables/IsVariableInitialized_596" + input: "report_uninitialized_variables/IsVariableInitialized_597" + input: "report_uninitialized_variables/IsVariableInitialized_598" + input: "report_uninitialized_variables/IsVariableInitialized_599" + input: "report_uninitialized_variables/IsVariableInitialized_600" + input: "report_uninitialized_variables/IsVariableInitialized_601" + input: "report_uninitialized_variables/IsVariableInitialized_602" + input: "report_uninitialized_variables/IsVariableInitialized_603" + input: "report_uninitialized_variables/IsVariableInitialized_604" + input: "report_uninitialized_variables/IsVariableInitialized_605" + input: "report_uninitialized_variables/IsVariableInitialized_606" + input: "report_uninitialized_variables/IsVariableInitialized_607" + input: "report_uninitialized_variables/IsVariableInitialized_608" + input: "report_uninitialized_variables/IsVariableInitialized_609" + input: "report_uninitialized_variables/IsVariableInitialized_610" + input: "report_uninitialized_variables/IsVariableInitialized_611" + input: "report_uninitialized_variables/IsVariableInitialized_612" + input: "report_uninitialized_variables/IsVariableInitialized_613" + input: "report_uninitialized_variables/IsVariableInitialized_614" + input: "report_uninitialized_variables/IsVariableInitialized_615" + input: "report_uninitialized_variables/IsVariableInitialized_616" + input: "report_uninitialized_variables/IsVariableInitialized_617" + device: "/device:CPU:0" + attr { + key: "N" + value { + i: 619 + } + } + attr { + key: "T" + value { + type: DT_BOOL + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 619 + } + } + } + } + } + attr { + key: "axis" + value { + i: 0 + } + } +} +node { + name: "report_uninitialized_variables/LogicalNot" + op: "LogicalNot" + input: "report_uninitialized_variables/stack" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 619 + } + } + } + } + } +} +node { + name: "report_uninitialized_variables/Const" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 619 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 619 + } + } + string_val: "global_step" + string_val: "bert/embeddings/word_embeddings" + string_val: "bert/embeddings/token_type_embeddings" + string_val: "bert/embeddings/position_embeddings" + string_val: "bert/embeddings/LayerNorm/beta" + string_val: "bert/embeddings/LayerNorm/gamma" + string_val: "bert/encoder/layer_0/attention/self/query/kernel" + string_val: "bert/encoder/layer_0/attention/self/query/bias" + string_val: "bert/encoder/layer_0/attention/self/key/kernel" + string_val: "bert/encoder/layer_0/attention/self/key/bias" + string_val: "bert/encoder/layer_0/attention/self/value/kernel" + string_val: "bert/encoder/layer_0/attention/self/value/bias" + string_val: "bert/encoder/layer_0/attention/output/dense/kernel" + string_val: "bert/encoder/layer_0/attention/output/dense/bias" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_0/intermediate/dense/kernel" + string_val: "bert/encoder/layer_0/intermediate/dense/bias" + string_val: "bert/encoder/layer_0/output/dense/kernel" + string_val: "bert/encoder/layer_0/output/dense/bias" + string_val: "bert/encoder/layer_0/output/LayerNorm/beta" + string_val: "bert/encoder/layer_0/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_1/attention/self/query/kernel" + string_val: "bert/encoder/layer_1/attention/self/query/bias" + string_val: "bert/encoder/layer_1/attention/self/key/kernel" + string_val: "bert/encoder/layer_1/attention/self/key/bias" + string_val: "bert/encoder/layer_1/attention/self/value/kernel" + string_val: "bert/encoder/layer_1/attention/self/value/bias" + string_val: "bert/encoder/layer_1/attention/output/dense/kernel" + string_val: "bert/encoder/layer_1/attention/output/dense/bias" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_1/intermediate/dense/kernel" + string_val: "bert/encoder/layer_1/intermediate/dense/bias" + string_val: "bert/encoder/layer_1/output/dense/kernel" + string_val: "bert/encoder/layer_1/output/dense/bias" + string_val: "bert/encoder/layer_1/output/LayerNorm/beta" + string_val: "bert/encoder/layer_1/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_2/attention/self/query/kernel" + string_val: "bert/encoder/layer_2/attention/self/query/bias" + string_val: "bert/encoder/layer_2/attention/self/key/kernel" + string_val: "bert/encoder/layer_2/attention/self/key/bias" + string_val: "bert/encoder/layer_2/attention/self/value/kernel" + string_val: "bert/encoder/layer_2/attention/self/value/bias" + string_val: "bert/encoder/layer_2/attention/output/dense/kernel" + string_val: "bert/encoder/layer_2/attention/output/dense/bias" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_2/intermediate/dense/kernel" + string_val: "bert/encoder/layer_2/intermediate/dense/bias" + string_val: "bert/encoder/layer_2/output/dense/kernel" + string_val: "bert/encoder/layer_2/output/dense/bias" + string_val: "bert/encoder/layer_2/output/LayerNorm/beta" + string_val: "bert/encoder/layer_2/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_3/attention/self/query/kernel" + string_val: "bert/encoder/layer_3/attention/self/query/bias" + string_val: "bert/encoder/layer_3/attention/self/key/kernel" + string_val: "bert/encoder/layer_3/attention/self/key/bias" + string_val: "bert/encoder/layer_3/attention/self/value/kernel" + string_val: "bert/encoder/layer_3/attention/self/value/bias" + string_val: "bert/encoder/layer_3/attention/output/dense/kernel" + string_val: "bert/encoder/layer_3/attention/output/dense/bias" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_3/intermediate/dense/kernel" + string_val: "bert/encoder/layer_3/intermediate/dense/bias" + string_val: "bert/encoder/layer_3/output/dense/kernel" + string_val: "bert/encoder/layer_3/output/dense/bias" + string_val: "bert/encoder/layer_3/output/LayerNorm/beta" + string_val: "bert/encoder/layer_3/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_4/attention/self/query/kernel" + string_val: "bert/encoder/layer_4/attention/self/query/bias" + string_val: "bert/encoder/layer_4/attention/self/key/kernel" + string_val: "bert/encoder/layer_4/attention/self/key/bias" + string_val: "bert/encoder/layer_4/attention/self/value/kernel" + string_val: "bert/encoder/layer_4/attention/self/value/bias" + string_val: "bert/encoder/layer_4/attention/output/dense/kernel" + string_val: "bert/encoder/layer_4/attention/output/dense/bias" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_4/intermediate/dense/kernel" + string_val: "bert/encoder/layer_4/intermediate/dense/bias" + string_val: "bert/encoder/layer_4/output/dense/kernel" + string_val: "bert/encoder/layer_4/output/dense/bias" + string_val: "bert/encoder/layer_4/output/LayerNorm/beta" + string_val: "bert/encoder/layer_4/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_5/attention/self/query/kernel" + string_val: "bert/encoder/layer_5/attention/self/query/bias" + string_val: "bert/encoder/layer_5/attention/self/key/kernel" + string_val: "bert/encoder/layer_5/attention/self/key/bias" + string_val: "bert/encoder/layer_5/attention/self/value/kernel" + string_val: "bert/encoder/layer_5/attention/self/value/bias" + string_val: "bert/encoder/layer_5/attention/output/dense/kernel" + string_val: "bert/encoder/layer_5/attention/output/dense/bias" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_5/intermediate/dense/kernel" + string_val: "bert/encoder/layer_5/intermediate/dense/bias" + string_val: "bert/encoder/layer_5/output/dense/kernel" + string_val: "bert/encoder/layer_5/output/dense/bias" + string_val: "bert/encoder/layer_5/output/LayerNorm/beta" + string_val: "bert/encoder/layer_5/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_6/attention/self/query/kernel" + string_val: "bert/encoder/layer_6/attention/self/query/bias" + string_val: "bert/encoder/layer_6/attention/self/key/kernel" + string_val: "bert/encoder/layer_6/attention/self/key/bias" + string_val: "bert/encoder/layer_6/attention/self/value/kernel" + string_val: "bert/encoder/layer_6/attention/self/value/bias" + string_val: "bert/encoder/layer_6/attention/output/dense/kernel" + string_val: "bert/encoder/layer_6/attention/output/dense/bias" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_6/intermediate/dense/kernel" + string_val: "bert/encoder/layer_6/intermediate/dense/bias" + string_val: "bert/encoder/layer_6/output/dense/kernel" + string_val: "bert/encoder/layer_6/output/dense/bias" + string_val: "bert/encoder/layer_6/output/LayerNorm/beta" + string_val: "bert/encoder/layer_6/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_7/attention/self/query/kernel" + string_val: "bert/encoder/layer_7/attention/self/query/bias" + string_val: "bert/encoder/layer_7/attention/self/key/kernel" + string_val: "bert/encoder/layer_7/attention/self/key/bias" + string_val: "bert/encoder/layer_7/attention/self/value/kernel" + string_val: "bert/encoder/layer_7/attention/self/value/bias" + string_val: "bert/encoder/layer_7/attention/output/dense/kernel" + string_val: "bert/encoder/layer_7/attention/output/dense/bias" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_7/intermediate/dense/kernel" + string_val: "bert/encoder/layer_7/intermediate/dense/bias" + string_val: "bert/encoder/layer_7/output/dense/kernel" + string_val: "bert/encoder/layer_7/output/dense/bias" + string_val: "bert/encoder/layer_7/output/LayerNorm/beta" + string_val: "bert/encoder/layer_7/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_8/attention/self/query/kernel" + string_val: "bert/encoder/layer_8/attention/self/query/bias" + string_val: "bert/encoder/layer_8/attention/self/key/kernel" + string_val: "bert/encoder/layer_8/attention/self/key/bias" + string_val: "bert/encoder/layer_8/attention/self/value/kernel" + string_val: "bert/encoder/layer_8/attention/self/value/bias" + string_val: "bert/encoder/layer_8/attention/output/dense/kernel" + string_val: "bert/encoder/layer_8/attention/output/dense/bias" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_8/intermediate/dense/kernel" + string_val: "bert/encoder/layer_8/intermediate/dense/bias" + string_val: "bert/encoder/layer_8/output/dense/kernel" + string_val: "bert/encoder/layer_8/output/dense/bias" + string_val: "bert/encoder/layer_8/output/LayerNorm/beta" + string_val: "bert/encoder/layer_8/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_9/attention/self/query/kernel" + string_val: "bert/encoder/layer_9/attention/self/query/bias" + string_val: "bert/encoder/layer_9/attention/self/key/kernel" + string_val: "bert/encoder/layer_9/attention/self/key/bias" + string_val: "bert/encoder/layer_9/attention/self/value/kernel" + string_val: "bert/encoder/layer_9/attention/self/value/bias" + string_val: "bert/encoder/layer_9/attention/output/dense/kernel" + string_val: "bert/encoder/layer_9/attention/output/dense/bias" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_9/intermediate/dense/kernel" + string_val: "bert/encoder/layer_9/intermediate/dense/bias" + string_val: "bert/encoder/layer_9/output/dense/kernel" + string_val: "bert/encoder/layer_9/output/dense/bias" + string_val: "bert/encoder/layer_9/output/LayerNorm/beta" + string_val: "bert/encoder/layer_9/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_10/attention/self/query/kernel" + string_val: "bert/encoder/layer_10/attention/self/query/bias" + string_val: "bert/encoder/layer_10/attention/self/key/kernel" + string_val: "bert/encoder/layer_10/attention/self/key/bias" + string_val: "bert/encoder/layer_10/attention/self/value/kernel" + string_val: "bert/encoder/layer_10/attention/self/value/bias" + string_val: "bert/encoder/layer_10/attention/output/dense/kernel" + string_val: "bert/encoder/layer_10/attention/output/dense/bias" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_10/intermediate/dense/kernel" + string_val: "bert/encoder/layer_10/intermediate/dense/bias" + string_val: "bert/encoder/layer_10/output/dense/kernel" + string_val: "bert/encoder/layer_10/output/dense/bias" + string_val: "bert/encoder/layer_10/output/LayerNorm/beta" + string_val: "bert/encoder/layer_10/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_11/attention/self/query/kernel" + string_val: "bert/encoder/layer_11/attention/self/query/bias" + string_val: "bert/encoder/layer_11/attention/self/key/kernel" + string_val: "bert/encoder/layer_11/attention/self/key/bias" + string_val: "bert/encoder/layer_11/attention/self/value/kernel" + string_val: "bert/encoder/layer_11/attention/self/value/bias" + string_val: "bert/encoder/layer_11/attention/output/dense/kernel" + string_val: "bert/encoder/layer_11/attention/output/dense/bias" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_11/intermediate/dense/kernel" + string_val: "bert/encoder/layer_11/intermediate/dense/bias" + string_val: "bert/encoder/layer_11/output/dense/kernel" + string_val: "bert/encoder/layer_11/output/dense/bias" + string_val: "bert/encoder/layer_11/output/LayerNorm/beta" + string_val: "bert/encoder/layer_11/output/LayerNorm/gamma" + string_val: "bert/pooler/dense/kernel" + string_val: "bert/pooler/dense/bias" + string_val: "cls/predictions/transform/dense/kernel" + string_val: "cls/predictions/transform/dense/bias" + string_val: "cls/predictions/transform/LayerNorm/beta" + string_val: "cls/predictions/transform/LayerNorm/gamma" + string_val: "cls/predictions/output_bias" + string_val: "cls/seq_relationship/output_weights" + string_val: "cls/seq_relationship/output_bias" + string_val: "bert/embeddings/word_embeddings/adam_m" + string_val: "bert/embeddings/word_embeddings/adam_v" + string_val: "bert/embeddings/token_type_embeddings/adam_m" + string_val: "bert/embeddings/token_type_embeddings/adam_v" + string_val: "bert/embeddings/position_embeddings/adam_m" + string_val: "bert/embeddings/position_embeddings/adam_v" + string_val: "bert/embeddings/LayerNorm/beta/adam_m" + string_val: "bert/embeddings/LayerNorm/beta/adam_v" + string_val: "bert/embeddings/LayerNorm/gamma/adam_m" + string_val: "bert/embeddings/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_0/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_0/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_0/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_0/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_0/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_0/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_0/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_0/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_0/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_0/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_0/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_0/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_0/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_0/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_0/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_0/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_0/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_0/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_0/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_0/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_1/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_1/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_1/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_1/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_1/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_1/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_1/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_1/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_1/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_1/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_1/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_1/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_1/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_1/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_1/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_1/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_1/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_1/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_1/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_1/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_2/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_2/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_2/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_2/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_2/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_2/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_2/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_2/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_2/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_2/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_2/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_2/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_2/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_2/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_2/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_2/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_2/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_2/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_2/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_2/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_3/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_3/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_3/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_3/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_3/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_3/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_3/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_3/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_3/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_3/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_3/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_3/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_3/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_3/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_3/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_3/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_3/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_3/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_3/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_3/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_4/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_4/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_4/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_4/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_4/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_4/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_4/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_4/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_4/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_4/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_4/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_4/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_4/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_4/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_4/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_4/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_4/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_4/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_4/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_4/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_5/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_5/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_5/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_5/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_5/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_5/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_5/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_5/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_5/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_5/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_5/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_5/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_5/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_5/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_5/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_5/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_5/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_5/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_5/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_5/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_6/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_6/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_6/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_6/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_6/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_6/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_6/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_6/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_6/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_6/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_6/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_6/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_6/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_6/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_6/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_6/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_6/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_6/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_6/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_6/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_7/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_7/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_7/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_7/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_7/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_7/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_7/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_7/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_7/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_7/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_7/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_7/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_7/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_7/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_7/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_7/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_7/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_7/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_7/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_7/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_8/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_8/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_8/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_8/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_8/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_8/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_8/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_8/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_8/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_8/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_8/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_8/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_8/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_8/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_8/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_8/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_8/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_8/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_8/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_8/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_9/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_9/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_9/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_9/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_9/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_9/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_9/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_9/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_9/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_9/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_9/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_9/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_9/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_9/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_9/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_9/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_9/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_9/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_9/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_9/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_10/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_10/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_10/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_10/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_10/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_10/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_10/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_10/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_10/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_10/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_10/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_10/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_10/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_10/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_10/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_10/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_10/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_10/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_10/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_10/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_11/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_11/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_11/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_11/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_11/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_11/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_11/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_11/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_11/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_11/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_11/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_11/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_11/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_11/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_11/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_11/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_11/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_11/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_11/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_11/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v" + string_val: "bert/pooler/dense/kernel/adam_m" + string_val: "bert/pooler/dense/kernel/adam_v" + string_val: "bert/pooler/dense/bias/adam_m" + string_val: "bert/pooler/dense/bias/adam_v" + string_val: "cls/predictions/transform/dense/kernel/adam_m" + string_val: "cls/predictions/transform/dense/kernel/adam_v" + string_val: "cls/predictions/transform/dense/bias/adam_m" + string_val: "cls/predictions/transform/dense/bias/adam_v" + string_val: "cls/predictions/transform/LayerNorm/beta/adam_m" + string_val: "cls/predictions/transform/LayerNorm/beta/adam_v" + string_val: "cls/predictions/transform/LayerNorm/gamma/adam_m" + string_val: "cls/predictions/transform/LayerNorm/gamma/adam_v" + string_val: "cls/predictions/output_bias/adam_m" + string_val: "cls/predictions/output_bias/adam_v" + string_val: "cls/seq_relationship/output_weights/adam_m" + string_val: "cls/seq_relationship/output_weights/adam_v" + string_val: "cls/seq_relationship/output_bias/adam_m" + string_val: "cls/seq_relationship/output_bias/adam_v" + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/Shape" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 619 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/strided_slice/stack" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/strided_slice/stack_1" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/strided_slice/stack_2" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/strided_slice" + op: "StridedSlice" + input: "report_uninitialized_variables/boolean_mask/Shape" + input: "report_uninitialized_variables/boolean_mask/strided_slice/stack" + input: "report_uninitialized_variables/boolean_mask/strided_slice/stack_1" + input: "report_uninitialized_variables/boolean_mask/strided_slice/stack_2" + device: "/device:CPU:0" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/Prod/reduction_indices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/Prod" + op: "Prod" + input: "report_uninitialized_variables/boolean_mask/strided_slice" + input: "report_uninitialized_variables/boolean_mask/Prod/reduction_indices" + device: "/device:CPU:0" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/Shape_1" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 619 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/strided_slice_1/stack" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/strided_slice_1/stack_1" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/strided_slice_1/stack_2" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/strided_slice_1" + op: "StridedSlice" + input: "report_uninitialized_variables/boolean_mask/Shape_1" + input: "report_uninitialized_variables/boolean_mask/strided_slice_1/stack" + input: "report_uninitialized_variables/boolean_mask/strided_slice_1/stack_1" + input: "report_uninitialized_variables/boolean_mask/strided_slice_1/stack_2" + device: "/device:CPU:0" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 1 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/Shape_2" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 619 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/strided_slice_2/stack" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/strided_slice_2/stack_1" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/strided_slice_2/stack_2" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/strided_slice_2" + op: "StridedSlice" + input: "report_uninitialized_variables/boolean_mask/Shape_2" + input: "report_uninitialized_variables/boolean_mask/strided_slice_2/stack" + input: "report_uninitialized_variables/boolean_mask/strided_slice_2/stack_1" + input: "report_uninitialized_variables/boolean_mask/strided_slice_2/stack_2" + device: "/device:CPU:0" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 1 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/concat/values_1" + op: "Pack" + input: "report_uninitialized_variables/boolean_mask/Prod" + device: "/device:CPU:0" + attr { + key: "N" + value { + i: 1 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "axis" + value { + i: 0 + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/concat/axis" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/concat" + op: "ConcatV2" + input: "report_uninitialized_variables/boolean_mask/strided_slice_1" + input: "report_uninitialized_variables/boolean_mask/concat/values_1" + input: "report_uninitialized_variables/boolean_mask/strided_slice_2" + input: "report_uninitialized_variables/boolean_mask/concat/axis" + device: "/device:CPU:0" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/Reshape" + op: "Reshape" + input: "report_uninitialized_variables/Const" + input: "report_uninitialized_variables/boolean_mask/concat" + device: "/device:CPU:0" + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 619 + } + } + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/Reshape_1/shape" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -1 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/Reshape_1" + op: "Reshape" + input: "report_uninitialized_variables/LogicalNot" + input: "report_uninitialized_variables/boolean_mask/Reshape_1/shape" + device: "/device:CPU:0" + attr { + key: "T" + value { + type: DT_BOOL + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 619 + } + } + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/Where" + op: "Where" + input: "report_uninitialized_variables/boolean_mask/Reshape_1" + device: "/device:CPU:0" + attr { + key: "T" + value { + type: DT_BOOL + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/Squeeze" + op: "Squeeze" + input: "report_uninitialized_variables/boolean_mask/Where" + device: "/device:CPU:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + } + } + } + attr { + key: "squeeze_dims" + value { + list { + i: 1 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/GatherV2/axis" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/GatherV2" + op: "GatherV2" + input: "report_uninitialized_variables/boolean_mask/Reshape" + input: "report_uninitialized_variables/boolean_mask/Squeeze" + input: "report_uninitialized_variables/boolean_mask/GatherV2/axis" + device: "/device:CPU:0" + attr { + key: "Taxis" + value { + type: DT_INT32 + } + } + attr { + key: "Tindices" + value { + type: DT_INT64 + } + } + attr { + key: "Tparams" + value { + type: DT_STRING + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "report_uninitialized_resources/Const" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "concat/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "concat" + op: "ConcatV2" + input: "report_uninitialized_variables/boolean_mask/GatherV2" + input: "report_uninitialized_resources/Const" + input: "concat/axis" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "report_uninitialized_variables_1/VarIsInitializedOp" + op: "VarIsInitializedOp" + input: "global_step" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized" + op: "IsVariableInitialized" + input: "bert/embeddings/word_embeddings" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_1" + op: "IsVariableInitialized" + input: "bert/embeddings/token_type_embeddings" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_2" + op: "IsVariableInitialized" + input: "bert/embeddings/position_embeddings" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_3" + op: "IsVariableInitialized" + input: "bert/embeddings/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_4" + op: "IsVariableInitialized" + input: "bert/embeddings/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_5" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_6" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_7" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_8" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_9" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_10" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_11" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_12" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_13" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_14" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_15" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_16" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_17" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_18" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_19" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_20" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_21" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_22" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_23" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_24" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_25" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_26" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_27" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_28" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_29" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_30" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_31" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_32" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_33" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_34" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_35" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_36" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_37" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_38" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_39" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_40" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_41" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_42" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_43" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_44" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_45" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_46" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_47" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_48" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_49" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_50" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_51" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_52" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_53" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_54" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_55" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_56" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_57" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_58" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_59" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_60" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_61" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_62" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_63" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_64" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_65" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_66" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_67" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_68" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_69" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_70" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_71" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_72" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_73" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_74" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_75" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_76" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_77" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_78" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_79" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_80" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_81" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_82" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_83" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_84" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_85" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_86" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_87" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_88" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_89" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_90" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_91" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_92" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_93" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_94" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_95" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_96" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_97" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_98" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_99" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_100" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_101" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_102" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_103" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_104" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_105" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_106" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_107" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_108" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_109" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_110" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_111" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_112" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_113" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_114" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_115" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_116" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_117" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_118" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_119" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_120" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_121" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_122" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_123" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_124" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_125" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_126" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_127" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_128" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_129" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_130" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_131" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_132" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_133" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_134" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_135" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_136" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_137" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_138" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_139" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_140" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_141" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_142" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_143" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_144" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_145" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_146" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_147" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_148" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_149" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_150" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_151" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_152" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_153" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_154" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_155" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_156" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_157" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_158" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_159" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_160" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_161" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_162" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_163" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_164" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_165" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_166" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_167" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_168" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_169" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_170" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_171" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_172" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_173" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_174" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_175" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_176" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_177" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_178" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_179" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_180" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_181" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_182" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_183" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_184" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_185" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_186" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_187" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_188" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_189" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_190" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_191" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_192" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_193" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_194" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_195" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_196" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_197" + op: "IsVariableInitialized" + input: "bert/pooler/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_198" + op: "IsVariableInitialized" + input: "bert/pooler/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_199" + op: "IsVariableInitialized" + input: "cls/predictions/transform/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_200" + op: "IsVariableInitialized" + input: "cls/predictions/transform/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_201" + op: "IsVariableInitialized" + input: "cls/predictions/transform/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_202" + op: "IsVariableInitialized" + input: "cls/predictions/transform/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_203" + op: "IsVariableInitialized" + input: "cls/predictions/output_bias" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_204" + op: "IsVariableInitialized" + input: "cls/seq_relationship/output_weights" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_205" + op: "IsVariableInitialized" + input: "cls/seq_relationship/output_bias" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_206" + op: "IsVariableInitialized" + input: "bert/embeddings/word_embeddings/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_207" + op: "IsVariableInitialized" + input: "bert/embeddings/word_embeddings/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_208" + op: "IsVariableInitialized" + input: "bert/embeddings/token_type_embeddings/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_209" + op: "IsVariableInitialized" + input: "bert/embeddings/token_type_embeddings/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_210" + op: "IsVariableInitialized" + input: "bert/embeddings/position_embeddings/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_211" + op: "IsVariableInitialized" + input: "bert/embeddings/position_embeddings/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_212" + op: "IsVariableInitialized" + input: "bert/embeddings/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_213" + op: "IsVariableInitialized" + input: "bert/embeddings/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_214" + op: "IsVariableInitialized" + input: "bert/embeddings/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_215" + op: "IsVariableInitialized" + input: "bert/embeddings/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_216" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_217" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_218" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_219" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_220" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_221" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_222" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_223" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_224" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_225" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_226" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_227" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_228" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_229" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_230" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_231" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_232" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_233" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_234" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_235" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_236" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_237" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_238" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_239" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_240" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_241" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_242" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_243" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_244" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_245" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_246" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_247" + op: "IsVariableInitialized" + input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_248" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_249" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_250" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_251" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_252" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_253" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_254" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_255" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_256" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_257" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_258" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_259" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_260" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_261" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_262" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_263" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_264" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_265" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_266" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_267" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_268" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_269" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_270" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_271" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_272" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_273" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_274" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_275" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_276" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_277" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_278" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_279" + op: "IsVariableInitialized" + input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_280" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_281" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_282" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_283" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_284" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_285" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_286" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_287" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_288" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_289" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_290" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_291" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_292" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_293" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_294" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_295" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_296" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_297" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_298" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_299" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_300" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_301" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_302" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_303" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_304" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_305" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_306" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_307" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_308" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_309" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_310" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_311" + op: "IsVariableInitialized" + input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_312" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_313" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_314" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_315" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_316" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_317" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_318" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_319" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_320" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_321" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_322" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_323" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_324" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_325" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_326" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_327" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_328" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_329" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_330" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_331" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_332" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_333" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_334" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_335" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_336" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_337" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_338" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_339" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_340" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_341" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_342" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_343" + op: "IsVariableInitialized" + input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_344" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_345" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_346" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_347" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_348" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_349" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_350" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_351" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_352" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_353" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_354" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_355" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_356" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_357" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_358" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_359" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_360" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_361" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_362" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_363" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_364" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_365" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_366" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_367" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_368" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_369" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_370" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_371" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_372" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_373" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_374" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_375" + op: "IsVariableInitialized" + input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_376" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_377" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_378" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_379" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_380" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_381" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_382" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_383" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_384" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_385" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_386" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_387" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_388" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_389" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_390" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_391" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_392" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_393" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_394" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_395" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_396" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_397" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_398" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_399" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_400" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_401" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_402" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_403" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_404" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_405" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_406" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_407" + op: "IsVariableInitialized" + input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_408" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_409" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_410" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_411" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_412" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_413" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_414" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_415" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_416" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_417" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_418" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_419" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_420" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_421" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_422" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_423" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_424" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_425" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_426" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_427" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_428" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_429" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_430" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_431" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_432" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_433" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_434" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_435" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_436" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_437" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_438" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_439" + op: "IsVariableInitialized" + input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_440" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_441" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_442" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_443" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_444" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_445" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_446" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_447" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_448" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_449" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_450" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_451" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_452" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_453" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_454" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_455" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_456" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_457" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_458" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_459" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_460" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_461" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_462" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_463" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_464" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_465" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_466" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_467" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_468" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_469" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_470" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_471" + op: "IsVariableInitialized" + input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_472" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_473" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_474" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_475" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_476" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_477" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_478" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_479" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_480" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_481" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_482" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_483" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_484" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_485" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_486" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_487" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_488" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_489" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_490" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_491" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_492" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_493" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_494" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_495" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_496" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_497" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_498" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_499" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_500" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_501" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_502" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_503" + op: "IsVariableInitialized" + input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_504" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_505" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_506" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_507" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_508" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_509" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_510" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_511" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_512" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_513" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_514" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_515" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_516" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_517" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_518" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_519" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_520" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_521" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_522" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_523" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_524" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_525" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_526" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_527" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_528" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_529" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_530" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_531" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_532" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_533" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_534" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_535" + op: "IsVariableInitialized" + input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_536" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_537" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_538" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_539" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_540" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_541" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_542" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_543" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_544" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_545" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_546" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_547" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_548" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_549" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_550" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_551" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_552" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_553" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_554" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_555" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_556" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_557" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_558" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_559" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_560" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_561" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_562" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_563" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_564" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_565" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_566" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_567" + op: "IsVariableInitialized" + input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_568" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_569" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_570" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_571" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_572" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_573" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_574" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_575" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_576" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_577" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_578" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_579" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_580" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_581" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_582" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_583" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_584" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_585" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_586" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_587" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_588" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_589" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_590" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_591" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_592" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_593" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_594" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_595" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_596" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_597" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_598" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_599" + op: "IsVariableInitialized" + input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_600" + op: "IsVariableInitialized" + input: "bert/pooler/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_601" + op: "IsVariableInitialized" + input: "bert/pooler/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_602" + op: "IsVariableInitialized" + input: "bert/pooler/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_603" + op: "IsVariableInitialized" + input: "bert/pooler/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_604" + op: "IsVariableInitialized" + input: "cls/predictions/transform/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_605" + op: "IsVariableInitialized" + input: "cls/predictions/transform/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_606" + op: "IsVariableInitialized" + input: "cls/predictions/transform/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_607" + op: "IsVariableInitialized" + input: "cls/predictions/transform/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_608" + op: "IsVariableInitialized" + input: "cls/predictions/transform/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_609" + op: "IsVariableInitialized" + input: "cls/predictions/transform/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_610" + op: "IsVariableInitialized" + input: "cls/predictions/transform/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_611" + op: "IsVariableInitialized" + input: "cls/predictions/transform/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_612" + op: "IsVariableInitialized" + input: "cls/predictions/output_bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_613" + op: "IsVariableInitialized" + input: "cls/predictions/output_bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_614" + op: "IsVariableInitialized" + input: "cls/seq_relationship/output_weights/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_615" + op: "IsVariableInitialized" + input: "cls/seq_relationship/output_weights/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_616" + op: "IsVariableInitialized" + input: "cls/seq_relationship/output_bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_617" + op: "IsVariableInitialized" + input: "cls/seq_relationship/output_bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/stack" + op: "Pack" + input: "report_uninitialized_variables_1/VarIsInitializedOp" + input: "report_uninitialized_variables_1/IsVariableInitialized" + input: "report_uninitialized_variables_1/IsVariableInitialized_1" + input: "report_uninitialized_variables_1/IsVariableInitialized_2" + input: "report_uninitialized_variables_1/IsVariableInitialized_3" + input: "report_uninitialized_variables_1/IsVariableInitialized_4" + input: "report_uninitialized_variables_1/IsVariableInitialized_5" + input: "report_uninitialized_variables_1/IsVariableInitialized_6" + input: "report_uninitialized_variables_1/IsVariableInitialized_7" + input: "report_uninitialized_variables_1/IsVariableInitialized_8" + input: "report_uninitialized_variables_1/IsVariableInitialized_9" + input: "report_uninitialized_variables_1/IsVariableInitialized_10" + input: "report_uninitialized_variables_1/IsVariableInitialized_11" + input: "report_uninitialized_variables_1/IsVariableInitialized_12" + input: "report_uninitialized_variables_1/IsVariableInitialized_13" + input: "report_uninitialized_variables_1/IsVariableInitialized_14" + input: "report_uninitialized_variables_1/IsVariableInitialized_15" + input: "report_uninitialized_variables_1/IsVariableInitialized_16" + input: "report_uninitialized_variables_1/IsVariableInitialized_17" + input: "report_uninitialized_variables_1/IsVariableInitialized_18" + input: "report_uninitialized_variables_1/IsVariableInitialized_19" + input: "report_uninitialized_variables_1/IsVariableInitialized_20" + input: "report_uninitialized_variables_1/IsVariableInitialized_21" + input: "report_uninitialized_variables_1/IsVariableInitialized_22" + input: "report_uninitialized_variables_1/IsVariableInitialized_23" + input: "report_uninitialized_variables_1/IsVariableInitialized_24" + input: "report_uninitialized_variables_1/IsVariableInitialized_25" + input: "report_uninitialized_variables_1/IsVariableInitialized_26" + input: "report_uninitialized_variables_1/IsVariableInitialized_27" + input: "report_uninitialized_variables_1/IsVariableInitialized_28" + input: "report_uninitialized_variables_1/IsVariableInitialized_29" + input: "report_uninitialized_variables_1/IsVariableInitialized_30" + input: "report_uninitialized_variables_1/IsVariableInitialized_31" + input: "report_uninitialized_variables_1/IsVariableInitialized_32" + input: "report_uninitialized_variables_1/IsVariableInitialized_33" + input: "report_uninitialized_variables_1/IsVariableInitialized_34" + input: "report_uninitialized_variables_1/IsVariableInitialized_35" + input: "report_uninitialized_variables_1/IsVariableInitialized_36" + input: "report_uninitialized_variables_1/IsVariableInitialized_37" + input: "report_uninitialized_variables_1/IsVariableInitialized_38" + input: "report_uninitialized_variables_1/IsVariableInitialized_39" + input: "report_uninitialized_variables_1/IsVariableInitialized_40" + input: "report_uninitialized_variables_1/IsVariableInitialized_41" + input: "report_uninitialized_variables_1/IsVariableInitialized_42" + input: "report_uninitialized_variables_1/IsVariableInitialized_43" + input: "report_uninitialized_variables_1/IsVariableInitialized_44" + input: "report_uninitialized_variables_1/IsVariableInitialized_45" + input: "report_uninitialized_variables_1/IsVariableInitialized_46" + input: "report_uninitialized_variables_1/IsVariableInitialized_47" + input: "report_uninitialized_variables_1/IsVariableInitialized_48" + input: "report_uninitialized_variables_1/IsVariableInitialized_49" + input: "report_uninitialized_variables_1/IsVariableInitialized_50" + input: "report_uninitialized_variables_1/IsVariableInitialized_51" + input: "report_uninitialized_variables_1/IsVariableInitialized_52" + input: "report_uninitialized_variables_1/IsVariableInitialized_53" + input: "report_uninitialized_variables_1/IsVariableInitialized_54" + input: "report_uninitialized_variables_1/IsVariableInitialized_55" + input: "report_uninitialized_variables_1/IsVariableInitialized_56" + input: "report_uninitialized_variables_1/IsVariableInitialized_57" + input: "report_uninitialized_variables_1/IsVariableInitialized_58" + input: "report_uninitialized_variables_1/IsVariableInitialized_59" + input: "report_uninitialized_variables_1/IsVariableInitialized_60" + input: "report_uninitialized_variables_1/IsVariableInitialized_61" + input: "report_uninitialized_variables_1/IsVariableInitialized_62" + input: "report_uninitialized_variables_1/IsVariableInitialized_63" + input: "report_uninitialized_variables_1/IsVariableInitialized_64" + input: "report_uninitialized_variables_1/IsVariableInitialized_65" + input: "report_uninitialized_variables_1/IsVariableInitialized_66" + input: "report_uninitialized_variables_1/IsVariableInitialized_67" + input: "report_uninitialized_variables_1/IsVariableInitialized_68" + input: "report_uninitialized_variables_1/IsVariableInitialized_69" + input: "report_uninitialized_variables_1/IsVariableInitialized_70" + input: "report_uninitialized_variables_1/IsVariableInitialized_71" + input: "report_uninitialized_variables_1/IsVariableInitialized_72" + input: "report_uninitialized_variables_1/IsVariableInitialized_73" + input: "report_uninitialized_variables_1/IsVariableInitialized_74" + input: "report_uninitialized_variables_1/IsVariableInitialized_75" + input: "report_uninitialized_variables_1/IsVariableInitialized_76" + input: "report_uninitialized_variables_1/IsVariableInitialized_77" + input: "report_uninitialized_variables_1/IsVariableInitialized_78" + input: "report_uninitialized_variables_1/IsVariableInitialized_79" + input: "report_uninitialized_variables_1/IsVariableInitialized_80" + input: "report_uninitialized_variables_1/IsVariableInitialized_81" + input: "report_uninitialized_variables_1/IsVariableInitialized_82" + input: "report_uninitialized_variables_1/IsVariableInitialized_83" + input: "report_uninitialized_variables_1/IsVariableInitialized_84" + input: "report_uninitialized_variables_1/IsVariableInitialized_85" + input: "report_uninitialized_variables_1/IsVariableInitialized_86" + input: "report_uninitialized_variables_1/IsVariableInitialized_87" + input: "report_uninitialized_variables_1/IsVariableInitialized_88" + input: "report_uninitialized_variables_1/IsVariableInitialized_89" + input: "report_uninitialized_variables_1/IsVariableInitialized_90" + input: "report_uninitialized_variables_1/IsVariableInitialized_91" + input: "report_uninitialized_variables_1/IsVariableInitialized_92" + input: "report_uninitialized_variables_1/IsVariableInitialized_93" + input: "report_uninitialized_variables_1/IsVariableInitialized_94" + input: "report_uninitialized_variables_1/IsVariableInitialized_95" + input: "report_uninitialized_variables_1/IsVariableInitialized_96" + input: "report_uninitialized_variables_1/IsVariableInitialized_97" + input: "report_uninitialized_variables_1/IsVariableInitialized_98" + input: "report_uninitialized_variables_1/IsVariableInitialized_99" + input: "report_uninitialized_variables_1/IsVariableInitialized_100" + input: "report_uninitialized_variables_1/IsVariableInitialized_101" + input: "report_uninitialized_variables_1/IsVariableInitialized_102" + input: "report_uninitialized_variables_1/IsVariableInitialized_103" + input: "report_uninitialized_variables_1/IsVariableInitialized_104" + input: "report_uninitialized_variables_1/IsVariableInitialized_105" + input: "report_uninitialized_variables_1/IsVariableInitialized_106" + input: "report_uninitialized_variables_1/IsVariableInitialized_107" + input: "report_uninitialized_variables_1/IsVariableInitialized_108" + input: "report_uninitialized_variables_1/IsVariableInitialized_109" + input: "report_uninitialized_variables_1/IsVariableInitialized_110" + input: "report_uninitialized_variables_1/IsVariableInitialized_111" + input: "report_uninitialized_variables_1/IsVariableInitialized_112" + input: "report_uninitialized_variables_1/IsVariableInitialized_113" + input: "report_uninitialized_variables_1/IsVariableInitialized_114" + input: "report_uninitialized_variables_1/IsVariableInitialized_115" + input: "report_uninitialized_variables_1/IsVariableInitialized_116" + input: "report_uninitialized_variables_1/IsVariableInitialized_117" + input: "report_uninitialized_variables_1/IsVariableInitialized_118" + input: "report_uninitialized_variables_1/IsVariableInitialized_119" + input: "report_uninitialized_variables_1/IsVariableInitialized_120" + input: "report_uninitialized_variables_1/IsVariableInitialized_121" + input: "report_uninitialized_variables_1/IsVariableInitialized_122" + input: "report_uninitialized_variables_1/IsVariableInitialized_123" + input: "report_uninitialized_variables_1/IsVariableInitialized_124" + input: "report_uninitialized_variables_1/IsVariableInitialized_125" + input: "report_uninitialized_variables_1/IsVariableInitialized_126" + input: "report_uninitialized_variables_1/IsVariableInitialized_127" + input: "report_uninitialized_variables_1/IsVariableInitialized_128" + input: "report_uninitialized_variables_1/IsVariableInitialized_129" + input: "report_uninitialized_variables_1/IsVariableInitialized_130" + input: "report_uninitialized_variables_1/IsVariableInitialized_131" + input: "report_uninitialized_variables_1/IsVariableInitialized_132" + input: "report_uninitialized_variables_1/IsVariableInitialized_133" + input: "report_uninitialized_variables_1/IsVariableInitialized_134" + input: "report_uninitialized_variables_1/IsVariableInitialized_135" + input: "report_uninitialized_variables_1/IsVariableInitialized_136" + input: "report_uninitialized_variables_1/IsVariableInitialized_137" + input: "report_uninitialized_variables_1/IsVariableInitialized_138" + input: "report_uninitialized_variables_1/IsVariableInitialized_139" + input: "report_uninitialized_variables_1/IsVariableInitialized_140" + input: "report_uninitialized_variables_1/IsVariableInitialized_141" + input: "report_uninitialized_variables_1/IsVariableInitialized_142" + input: "report_uninitialized_variables_1/IsVariableInitialized_143" + input: "report_uninitialized_variables_1/IsVariableInitialized_144" + input: "report_uninitialized_variables_1/IsVariableInitialized_145" + input: "report_uninitialized_variables_1/IsVariableInitialized_146" + input: "report_uninitialized_variables_1/IsVariableInitialized_147" + input: "report_uninitialized_variables_1/IsVariableInitialized_148" + input: "report_uninitialized_variables_1/IsVariableInitialized_149" + input: "report_uninitialized_variables_1/IsVariableInitialized_150" + input: "report_uninitialized_variables_1/IsVariableInitialized_151" + input: "report_uninitialized_variables_1/IsVariableInitialized_152" + input: "report_uninitialized_variables_1/IsVariableInitialized_153" + input: "report_uninitialized_variables_1/IsVariableInitialized_154" + input: "report_uninitialized_variables_1/IsVariableInitialized_155" + input: "report_uninitialized_variables_1/IsVariableInitialized_156" + input: "report_uninitialized_variables_1/IsVariableInitialized_157" + input: "report_uninitialized_variables_1/IsVariableInitialized_158" + input: "report_uninitialized_variables_1/IsVariableInitialized_159" + input: "report_uninitialized_variables_1/IsVariableInitialized_160" + input: "report_uninitialized_variables_1/IsVariableInitialized_161" + input: "report_uninitialized_variables_1/IsVariableInitialized_162" + input: "report_uninitialized_variables_1/IsVariableInitialized_163" + input: "report_uninitialized_variables_1/IsVariableInitialized_164" + input: "report_uninitialized_variables_1/IsVariableInitialized_165" + input: "report_uninitialized_variables_1/IsVariableInitialized_166" + input: "report_uninitialized_variables_1/IsVariableInitialized_167" + input: "report_uninitialized_variables_1/IsVariableInitialized_168" + input: "report_uninitialized_variables_1/IsVariableInitialized_169" + input: "report_uninitialized_variables_1/IsVariableInitialized_170" + input: "report_uninitialized_variables_1/IsVariableInitialized_171" + input: "report_uninitialized_variables_1/IsVariableInitialized_172" + input: "report_uninitialized_variables_1/IsVariableInitialized_173" + input: "report_uninitialized_variables_1/IsVariableInitialized_174" + input: "report_uninitialized_variables_1/IsVariableInitialized_175" + input: "report_uninitialized_variables_1/IsVariableInitialized_176" + input: "report_uninitialized_variables_1/IsVariableInitialized_177" + input: "report_uninitialized_variables_1/IsVariableInitialized_178" + input: "report_uninitialized_variables_1/IsVariableInitialized_179" + input: "report_uninitialized_variables_1/IsVariableInitialized_180" + input: "report_uninitialized_variables_1/IsVariableInitialized_181" + input: "report_uninitialized_variables_1/IsVariableInitialized_182" + input: "report_uninitialized_variables_1/IsVariableInitialized_183" + input: "report_uninitialized_variables_1/IsVariableInitialized_184" + input: "report_uninitialized_variables_1/IsVariableInitialized_185" + input: "report_uninitialized_variables_1/IsVariableInitialized_186" + input: "report_uninitialized_variables_1/IsVariableInitialized_187" + input: "report_uninitialized_variables_1/IsVariableInitialized_188" + input: "report_uninitialized_variables_1/IsVariableInitialized_189" + input: "report_uninitialized_variables_1/IsVariableInitialized_190" + input: "report_uninitialized_variables_1/IsVariableInitialized_191" + input: "report_uninitialized_variables_1/IsVariableInitialized_192" + input: "report_uninitialized_variables_1/IsVariableInitialized_193" + input: "report_uninitialized_variables_1/IsVariableInitialized_194" + input: "report_uninitialized_variables_1/IsVariableInitialized_195" + input: "report_uninitialized_variables_1/IsVariableInitialized_196" + input: "report_uninitialized_variables_1/IsVariableInitialized_197" + input: "report_uninitialized_variables_1/IsVariableInitialized_198" + input: "report_uninitialized_variables_1/IsVariableInitialized_199" + input: "report_uninitialized_variables_1/IsVariableInitialized_200" + input: "report_uninitialized_variables_1/IsVariableInitialized_201" + input: "report_uninitialized_variables_1/IsVariableInitialized_202" + input: "report_uninitialized_variables_1/IsVariableInitialized_203" + input: "report_uninitialized_variables_1/IsVariableInitialized_204" + input: "report_uninitialized_variables_1/IsVariableInitialized_205" + input: "report_uninitialized_variables_1/IsVariableInitialized_206" + input: "report_uninitialized_variables_1/IsVariableInitialized_207" + input: "report_uninitialized_variables_1/IsVariableInitialized_208" + input: "report_uninitialized_variables_1/IsVariableInitialized_209" + input: "report_uninitialized_variables_1/IsVariableInitialized_210" + input: "report_uninitialized_variables_1/IsVariableInitialized_211" + input: "report_uninitialized_variables_1/IsVariableInitialized_212" + input: "report_uninitialized_variables_1/IsVariableInitialized_213" + input: "report_uninitialized_variables_1/IsVariableInitialized_214" + input: "report_uninitialized_variables_1/IsVariableInitialized_215" + input: "report_uninitialized_variables_1/IsVariableInitialized_216" + input: "report_uninitialized_variables_1/IsVariableInitialized_217" + input: "report_uninitialized_variables_1/IsVariableInitialized_218" + input: "report_uninitialized_variables_1/IsVariableInitialized_219" + input: "report_uninitialized_variables_1/IsVariableInitialized_220" + input: "report_uninitialized_variables_1/IsVariableInitialized_221" + input: "report_uninitialized_variables_1/IsVariableInitialized_222" + input: "report_uninitialized_variables_1/IsVariableInitialized_223" + input: "report_uninitialized_variables_1/IsVariableInitialized_224" + input: "report_uninitialized_variables_1/IsVariableInitialized_225" + input: "report_uninitialized_variables_1/IsVariableInitialized_226" + input: "report_uninitialized_variables_1/IsVariableInitialized_227" + input: "report_uninitialized_variables_1/IsVariableInitialized_228" + input: "report_uninitialized_variables_1/IsVariableInitialized_229" + input: "report_uninitialized_variables_1/IsVariableInitialized_230" + input: "report_uninitialized_variables_1/IsVariableInitialized_231" + input: "report_uninitialized_variables_1/IsVariableInitialized_232" + input: "report_uninitialized_variables_1/IsVariableInitialized_233" + input: "report_uninitialized_variables_1/IsVariableInitialized_234" + input: "report_uninitialized_variables_1/IsVariableInitialized_235" + input: "report_uninitialized_variables_1/IsVariableInitialized_236" + input: "report_uninitialized_variables_1/IsVariableInitialized_237" + input: "report_uninitialized_variables_1/IsVariableInitialized_238" + input: "report_uninitialized_variables_1/IsVariableInitialized_239" + input: "report_uninitialized_variables_1/IsVariableInitialized_240" + input: "report_uninitialized_variables_1/IsVariableInitialized_241" + input: "report_uninitialized_variables_1/IsVariableInitialized_242" + input: "report_uninitialized_variables_1/IsVariableInitialized_243" + input: "report_uninitialized_variables_1/IsVariableInitialized_244" + input: "report_uninitialized_variables_1/IsVariableInitialized_245" + input: "report_uninitialized_variables_1/IsVariableInitialized_246" + input: "report_uninitialized_variables_1/IsVariableInitialized_247" + input: "report_uninitialized_variables_1/IsVariableInitialized_248" + input: "report_uninitialized_variables_1/IsVariableInitialized_249" + input: "report_uninitialized_variables_1/IsVariableInitialized_250" + input: "report_uninitialized_variables_1/IsVariableInitialized_251" + input: "report_uninitialized_variables_1/IsVariableInitialized_252" + input: "report_uninitialized_variables_1/IsVariableInitialized_253" + input: "report_uninitialized_variables_1/IsVariableInitialized_254" + input: "report_uninitialized_variables_1/IsVariableInitialized_255" + input: "report_uninitialized_variables_1/IsVariableInitialized_256" + input: "report_uninitialized_variables_1/IsVariableInitialized_257" + input: "report_uninitialized_variables_1/IsVariableInitialized_258" + input: "report_uninitialized_variables_1/IsVariableInitialized_259" + input: "report_uninitialized_variables_1/IsVariableInitialized_260" + input: "report_uninitialized_variables_1/IsVariableInitialized_261" + input: "report_uninitialized_variables_1/IsVariableInitialized_262" + input: "report_uninitialized_variables_1/IsVariableInitialized_263" + input: "report_uninitialized_variables_1/IsVariableInitialized_264" + input: "report_uninitialized_variables_1/IsVariableInitialized_265" + input: "report_uninitialized_variables_1/IsVariableInitialized_266" + input: "report_uninitialized_variables_1/IsVariableInitialized_267" + input: "report_uninitialized_variables_1/IsVariableInitialized_268" + input: "report_uninitialized_variables_1/IsVariableInitialized_269" + input: "report_uninitialized_variables_1/IsVariableInitialized_270" + input: "report_uninitialized_variables_1/IsVariableInitialized_271" + input: "report_uninitialized_variables_1/IsVariableInitialized_272" + input: "report_uninitialized_variables_1/IsVariableInitialized_273" + input: "report_uninitialized_variables_1/IsVariableInitialized_274" + input: "report_uninitialized_variables_1/IsVariableInitialized_275" + input: "report_uninitialized_variables_1/IsVariableInitialized_276" + input: "report_uninitialized_variables_1/IsVariableInitialized_277" + input: "report_uninitialized_variables_1/IsVariableInitialized_278" + input: "report_uninitialized_variables_1/IsVariableInitialized_279" + input: "report_uninitialized_variables_1/IsVariableInitialized_280" + input: "report_uninitialized_variables_1/IsVariableInitialized_281" + input: "report_uninitialized_variables_1/IsVariableInitialized_282" + input: "report_uninitialized_variables_1/IsVariableInitialized_283" + input: "report_uninitialized_variables_1/IsVariableInitialized_284" + input: "report_uninitialized_variables_1/IsVariableInitialized_285" + input: "report_uninitialized_variables_1/IsVariableInitialized_286" + input: "report_uninitialized_variables_1/IsVariableInitialized_287" + input: "report_uninitialized_variables_1/IsVariableInitialized_288" + input: "report_uninitialized_variables_1/IsVariableInitialized_289" + input: "report_uninitialized_variables_1/IsVariableInitialized_290" + input: "report_uninitialized_variables_1/IsVariableInitialized_291" + input: "report_uninitialized_variables_1/IsVariableInitialized_292" + input: "report_uninitialized_variables_1/IsVariableInitialized_293" + input: "report_uninitialized_variables_1/IsVariableInitialized_294" + input: "report_uninitialized_variables_1/IsVariableInitialized_295" + input: "report_uninitialized_variables_1/IsVariableInitialized_296" + input: "report_uninitialized_variables_1/IsVariableInitialized_297" + input: "report_uninitialized_variables_1/IsVariableInitialized_298" + input: "report_uninitialized_variables_1/IsVariableInitialized_299" + input: "report_uninitialized_variables_1/IsVariableInitialized_300" + input: "report_uninitialized_variables_1/IsVariableInitialized_301" + input: "report_uninitialized_variables_1/IsVariableInitialized_302" + input: "report_uninitialized_variables_1/IsVariableInitialized_303" + input: "report_uninitialized_variables_1/IsVariableInitialized_304" + input: "report_uninitialized_variables_1/IsVariableInitialized_305" + input: "report_uninitialized_variables_1/IsVariableInitialized_306" + input: "report_uninitialized_variables_1/IsVariableInitialized_307" + input: "report_uninitialized_variables_1/IsVariableInitialized_308" + input: "report_uninitialized_variables_1/IsVariableInitialized_309" + input: "report_uninitialized_variables_1/IsVariableInitialized_310" + input: "report_uninitialized_variables_1/IsVariableInitialized_311" + input: "report_uninitialized_variables_1/IsVariableInitialized_312" + input: "report_uninitialized_variables_1/IsVariableInitialized_313" + input: "report_uninitialized_variables_1/IsVariableInitialized_314" + input: "report_uninitialized_variables_1/IsVariableInitialized_315" + input: "report_uninitialized_variables_1/IsVariableInitialized_316" + input: "report_uninitialized_variables_1/IsVariableInitialized_317" + input: "report_uninitialized_variables_1/IsVariableInitialized_318" + input: "report_uninitialized_variables_1/IsVariableInitialized_319" + input: "report_uninitialized_variables_1/IsVariableInitialized_320" + input: "report_uninitialized_variables_1/IsVariableInitialized_321" + input: "report_uninitialized_variables_1/IsVariableInitialized_322" + input: "report_uninitialized_variables_1/IsVariableInitialized_323" + input: "report_uninitialized_variables_1/IsVariableInitialized_324" + input: "report_uninitialized_variables_1/IsVariableInitialized_325" + input: "report_uninitialized_variables_1/IsVariableInitialized_326" + input: "report_uninitialized_variables_1/IsVariableInitialized_327" + input: "report_uninitialized_variables_1/IsVariableInitialized_328" + input: "report_uninitialized_variables_1/IsVariableInitialized_329" + input: "report_uninitialized_variables_1/IsVariableInitialized_330" + input: "report_uninitialized_variables_1/IsVariableInitialized_331" + input: "report_uninitialized_variables_1/IsVariableInitialized_332" + input: "report_uninitialized_variables_1/IsVariableInitialized_333" + input: "report_uninitialized_variables_1/IsVariableInitialized_334" + input: "report_uninitialized_variables_1/IsVariableInitialized_335" + input: "report_uninitialized_variables_1/IsVariableInitialized_336" + input: "report_uninitialized_variables_1/IsVariableInitialized_337" + input: "report_uninitialized_variables_1/IsVariableInitialized_338" + input: "report_uninitialized_variables_1/IsVariableInitialized_339" + input: "report_uninitialized_variables_1/IsVariableInitialized_340" + input: "report_uninitialized_variables_1/IsVariableInitialized_341" + input: "report_uninitialized_variables_1/IsVariableInitialized_342" + input: "report_uninitialized_variables_1/IsVariableInitialized_343" + input: "report_uninitialized_variables_1/IsVariableInitialized_344" + input: "report_uninitialized_variables_1/IsVariableInitialized_345" + input: "report_uninitialized_variables_1/IsVariableInitialized_346" + input: "report_uninitialized_variables_1/IsVariableInitialized_347" + input: "report_uninitialized_variables_1/IsVariableInitialized_348" + input: "report_uninitialized_variables_1/IsVariableInitialized_349" + input: "report_uninitialized_variables_1/IsVariableInitialized_350" + input: "report_uninitialized_variables_1/IsVariableInitialized_351" + input: "report_uninitialized_variables_1/IsVariableInitialized_352" + input: "report_uninitialized_variables_1/IsVariableInitialized_353" + input: "report_uninitialized_variables_1/IsVariableInitialized_354" + input: "report_uninitialized_variables_1/IsVariableInitialized_355" + input: "report_uninitialized_variables_1/IsVariableInitialized_356" + input: "report_uninitialized_variables_1/IsVariableInitialized_357" + input: "report_uninitialized_variables_1/IsVariableInitialized_358" + input: "report_uninitialized_variables_1/IsVariableInitialized_359" + input: "report_uninitialized_variables_1/IsVariableInitialized_360" + input: "report_uninitialized_variables_1/IsVariableInitialized_361" + input: "report_uninitialized_variables_1/IsVariableInitialized_362" + input: "report_uninitialized_variables_1/IsVariableInitialized_363" + input: "report_uninitialized_variables_1/IsVariableInitialized_364" + input: "report_uninitialized_variables_1/IsVariableInitialized_365" + input: "report_uninitialized_variables_1/IsVariableInitialized_366" + input: "report_uninitialized_variables_1/IsVariableInitialized_367" + input: "report_uninitialized_variables_1/IsVariableInitialized_368" + input: "report_uninitialized_variables_1/IsVariableInitialized_369" + input: "report_uninitialized_variables_1/IsVariableInitialized_370" + input: "report_uninitialized_variables_1/IsVariableInitialized_371" + input: "report_uninitialized_variables_1/IsVariableInitialized_372" + input: "report_uninitialized_variables_1/IsVariableInitialized_373" + input: "report_uninitialized_variables_1/IsVariableInitialized_374" + input: "report_uninitialized_variables_1/IsVariableInitialized_375" + input: "report_uninitialized_variables_1/IsVariableInitialized_376" + input: "report_uninitialized_variables_1/IsVariableInitialized_377" + input: "report_uninitialized_variables_1/IsVariableInitialized_378" + input: "report_uninitialized_variables_1/IsVariableInitialized_379" + input: "report_uninitialized_variables_1/IsVariableInitialized_380" + input: "report_uninitialized_variables_1/IsVariableInitialized_381" + input: "report_uninitialized_variables_1/IsVariableInitialized_382" + input: "report_uninitialized_variables_1/IsVariableInitialized_383" + input: "report_uninitialized_variables_1/IsVariableInitialized_384" + input: "report_uninitialized_variables_1/IsVariableInitialized_385" + input: "report_uninitialized_variables_1/IsVariableInitialized_386" + input: "report_uninitialized_variables_1/IsVariableInitialized_387" + input: "report_uninitialized_variables_1/IsVariableInitialized_388" + input: "report_uninitialized_variables_1/IsVariableInitialized_389" + input: "report_uninitialized_variables_1/IsVariableInitialized_390" + input: "report_uninitialized_variables_1/IsVariableInitialized_391" + input: "report_uninitialized_variables_1/IsVariableInitialized_392" + input: "report_uninitialized_variables_1/IsVariableInitialized_393" + input: "report_uninitialized_variables_1/IsVariableInitialized_394" + input: "report_uninitialized_variables_1/IsVariableInitialized_395" + input: "report_uninitialized_variables_1/IsVariableInitialized_396" + input: "report_uninitialized_variables_1/IsVariableInitialized_397" + input: "report_uninitialized_variables_1/IsVariableInitialized_398" + input: "report_uninitialized_variables_1/IsVariableInitialized_399" + input: "report_uninitialized_variables_1/IsVariableInitialized_400" + input: "report_uninitialized_variables_1/IsVariableInitialized_401" + input: "report_uninitialized_variables_1/IsVariableInitialized_402" + input: "report_uninitialized_variables_1/IsVariableInitialized_403" + input: "report_uninitialized_variables_1/IsVariableInitialized_404" + input: "report_uninitialized_variables_1/IsVariableInitialized_405" + input: "report_uninitialized_variables_1/IsVariableInitialized_406" + input: "report_uninitialized_variables_1/IsVariableInitialized_407" + input: "report_uninitialized_variables_1/IsVariableInitialized_408" + input: "report_uninitialized_variables_1/IsVariableInitialized_409" + input: "report_uninitialized_variables_1/IsVariableInitialized_410" + input: "report_uninitialized_variables_1/IsVariableInitialized_411" + input: "report_uninitialized_variables_1/IsVariableInitialized_412" + input: "report_uninitialized_variables_1/IsVariableInitialized_413" + input: "report_uninitialized_variables_1/IsVariableInitialized_414" + input: "report_uninitialized_variables_1/IsVariableInitialized_415" + input: "report_uninitialized_variables_1/IsVariableInitialized_416" + input: "report_uninitialized_variables_1/IsVariableInitialized_417" + input: "report_uninitialized_variables_1/IsVariableInitialized_418" + input: "report_uninitialized_variables_1/IsVariableInitialized_419" + input: "report_uninitialized_variables_1/IsVariableInitialized_420" + input: "report_uninitialized_variables_1/IsVariableInitialized_421" + input: "report_uninitialized_variables_1/IsVariableInitialized_422" + input: "report_uninitialized_variables_1/IsVariableInitialized_423" + input: "report_uninitialized_variables_1/IsVariableInitialized_424" + input: "report_uninitialized_variables_1/IsVariableInitialized_425" + input: "report_uninitialized_variables_1/IsVariableInitialized_426" + input: "report_uninitialized_variables_1/IsVariableInitialized_427" + input: "report_uninitialized_variables_1/IsVariableInitialized_428" + input: "report_uninitialized_variables_1/IsVariableInitialized_429" + input: "report_uninitialized_variables_1/IsVariableInitialized_430" + input: "report_uninitialized_variables_1/IsVariableInitialized_431" + input: "report_uninitialized_variables_1/IsVariableInitialized_432" + input: "report_uninitialized_variables_1/IsVariableInitialized_433" + input: "report_uninitialized_variables_1/IsVariableInitialized_434" + input: "report_uninitialized_variables_1/IsVariableInitialized_435" + input: "report_uninitialized_variables_1/IsVariableInitialized_436" + input: "report_uninitialized_variables_1/IsVariableInitialized_437" + input: "report_uninitialized_variables_1/IsVariableInitialized_438" + input: "report_uninitialized_variables_1/IsVariableInitialized_439" + input: "report_uninitialized_variables_1/IsVariableInitialized_440" + input: "report_uninitialized_variables_1/IsVariableInitialized_441" + input: "report_uninitialized_variables_1/IsVariableInitialized_442" + input: "report_uninitialized_variables_1/IsVariableInitialized_443" + input: "report_uninitialized_variables_1/IsVariableInitialized_444" + input: "report_uninitialized_variables_1/IsVariableInitialized_445" + input: "report_uninitialized_variables_1/IsVariableInitialized_446" + input: "report_uninitialized_variables_1/IsVariableInitialized_447" + input: "report_uninitialized_variables_1/IsVariableInitialized_448" + input: "report_uninitialized_variables_1/IsVariableInitialized_449" + input: "report_uninitialized_variables_1/IsVariableInitialized_450" + input: "report_uninitialized_variables_1/IsVariableInitialized_451" + input: "report_uninitialized_variables_1/IsVariableInitialized_452" + input: "report_uninitialized_variables_1/IsVariableInitialized_453" + input: "report_uninitialized_variables_1/IsVariableInitialized_454" + input: "report_uninitialized_variables_1/IsVariableInitialized_455" + input: "report_uninitialized_variables_1/IsVariableInitialized_456" + input: "report_uninitialized_variables_1/IsVariableInitialized_457" + input: "report_uninitialized_variables_1/IsVariableInitialized_458" + input: "report_uninitialized_variables_1/IsVariableInitialized_459" + input: "report_uninitialized_variables_1/IsVariableInitialized_460" + input: "report_uninitialized_variables_1/IsVariableInitialized_461" + input: "report_uninitialized_variables_1/IsVariableInitialized_462" + input: "report_uninitialized_variables_1/IsVariableInitialized_463" + input: "report_uninitialized_variables_1/IsVariableInitialized_464" + input: "report_uninitialized_variables_1/IsVariableInitialized_465" + input: "report_uninitialized_variables_1/IsVariableInitialized_466" + input: "report_uninitialized_variables_1/IsVariableInitialized_467" + input: "report_uninitialized_variables_1/IsVariableInitialized_468" + input: "report_uninitialized_variables_1/IsVariableInitialized_469" + input: "report_uninitialized_variables_1/IsVariableInitialized_470" + input: "report_uninitialized_variables_1/IsVariableInitialized_471" + input: "report_uninitialized_variables_1/IsVariableInitialized_472" + input: "report_uninitialized_variables_1/IsVariableInitialized_473" + input: "report_uninitialized_variables_1/IsVariableInitialized_474" + input: "report_uninitialized_variables_1/IsVariableInitialized_475" + input: "report_uninitialized_variables_1/IsVariableInitialized_476" + input: "report_uninitialized_variables_1/IsVariableInitialized_477" + input: "report_uninitialized_variables_1/IsVariableInitialized_478" + input: "report_uninitialized_variables_1/IsVariableInitialized_479" + input: "report_uninitialized_variables_1/IsVariableInitialized_480" + input: "report_uninitialized_variables_1/IsVariableInitialized_481" + input: "report_uninitialized_variables_1/IsVariableInitialized_482" + input: "report_uninitialized_variables_1/IsVariableInitialized_483" + input: "report_uninitialized_variables_1/IsVariableInitialized_484" + input: "report_uninitialized_variables_1/IsVariableInitialized_485" + input: "report_uninitialized_variables_1/IsVariableInitialized_486" + input: "report_uninitialized_variables_1/IsVariableInitialized_487" + input: "report_uninitialized_variables_1/IsVariableInitialized_488" + input: "report_uninitialized_variables_1/IsVariableInitialized_489" + input: "report_uninitialized_variables_1/IsVariableInitialized_490" + input: "report_uninitialized_variables_1/IsVariableInitialized_491" + input: "report_uninitialized_variables_1/IsVariableInitialized_492" + input: "report_uninitialized_variables_1/IsVariableInitialized_493" + input: "report_uninitialized_variables_1/IsVariableInitialized_494" + input: "report_uninitialized_variables_1/IsVariableInitialized_495" + input: "report_uninitialized_variables_1/IsVariableInitialized_496" + input: "report_uninitialized_variables_1/IsVariableInitialized_497" + input: "report_uninitialized_variables_1/IsVariableInitialized_498" + input: "report_uninitialized_variables_1/IsVariableInitialized_499" + input: "report_uninitialized_variables_1/IsVariableInitialized_500" + input: "report_uninitialized_variables_1/IsVariableInitialized_501" + input: "report_uninitialized_variables_1/IsVariableInitialized_502" + input: "report_uninitialized_variables_1/IsVariableInitialized_503" + input: "report_uninitialized_variables_1/IsVariableInitialized_504" + input: "report_uninitialized_variables_1/IsVariableInitialized_505" + input: "report_uninitialized_variables_1/IsVariableInitialized_506" + input: "report_uninitialized_variables_1/IsVariableInitialized_507" + input: "report_uninitialized_variables_1/IsVariableInitialized_508" + input: "report_uninitialized_variables_1/IsVariableInitialized_509" + input: "report_uninitialized_variables_1/IsVariableInitialized_510" + input: "report_uninitialized_variables_1/IsVariableInitialized_511" + input: "report_uninitialized_variables_1/IsVariableInitialized_512" + input: "report_uninitialized_variables_1/IsVariableInitialized_513" + input: "report_uninitialized_variables_1/IsVariableInitialized_514" + input: "report_uninitialized_variables_1/IsVariableInitialized_515" + input: "report_uninitialized_variables_1/IsVariableInitialized_516" + input: "report_uninitialized_variables_1/IsVariableInitialized_517" + input: "report_uninitialized_variables_1/IsVariableInitialized_518" + input: "report_uninitialized_variables_1/IsVariableInitialized_519" + input: "report_uninitialized_variables_1/IsVariableInitialized_520" + input: "report_uninitialized_variables_1/IsVariableInitialized_521" + input: "report_uninitialized_variables_1/IsVariableInitialized_522" + input: "report_uninitialized_variables_1/IsVariableInitialized_523" + input: "report_uninitialized_variables_1/IsVariableInitialized_524" + input: "report_uninitialized_variables_1/IsVariableInitialized_525" + input: "report_uninitialized_variables_1/IsVariableInitialized_526" + input: "report_uninitialized_variables_1/IsVariableInitialized_527" + input: "report_uninitialized_variables_1/IsVariableInitialized_528" + input: "report_uninitialized_variables_1/IsVariableInitialized_529" + input: "report_uninitialized_variables_1/IsVariableInitialized_530" + input: "report_uninitialized_variables_1/IsVariableInitialized_531" + input: "report_uninitialized_variables_1/IsVariableInitialized_532" + input: "report_uninitialized_variables_1/IsVariableInitialized_533" + input: "report_uninitialized_variables_1/IsVariableInitialized_534" + input: "report_uninitialized_variables_1/IsVariableInitialized_535" + input: "report_uninitialized_variables_1/IsVariableInitialized_536" + input: "report_uninitialized_variables_1/IsVariableInitialized_537" + input: "report_uninitialized_variables_1/IsVariableInitialized_538" + input: "report_uninitialized_variables_1/IsVariableInitialized_539" + input: "report_uninitialized_variables_1/IsVariableInitialized_540" + input: "report_uninitialized_variables_1/IsVariableInitialized_541" + input: "report_uninitialized_variables_1/IsVariableInitialized_542" + input: "report_uninitialized_variables_1/IsVariableInitialized_543" + input: "report_uninitialized_variables_1/IsVariableInitialized_544" + input: "report_uninitialized_variables_1/IsVariableInitialized_545" + input: "report_uninitialized_variables_1/IsVariableInitialized_546" + input: "report_uninitialized_variables_1/IsVariableInitialized_547" + input: "report_uninitialized_variables_1/IsVariableInitialized_548" + input: "report_uninitialized_variables_1/IsVariableInitialized_549" + input: "report_uninitialized_variables_1/IsVariableInitialized_550" + input: "report_uninitialized_variables_1/IsVariableInitialized_551" + input: "report_uninitialized_variables_1/IsVariableInitialized_552" + input: "report_uninitialized_variables_1/IsVariableInitialized_553" + input: "report_uninitialized_variables_1/IsVariableInitialized_554" + input: "report_uninitialized_variables_1/IsVariableInitialized_555" + input: "report_uninitialized_variables_1/IsVariableInitialized_556" + input: "report_uninitialized_variables_1/IsVariableInitialized_557" + input: "report_uninitialized_variables_1/IsVariableInitialized_558" + input: "report_uninitialized_variables_1/IsVariableInitialized_559" + input: "report_uninitialized_variables_1/IsVariableInitialized_560" + input: "report_uninitialized_variables_1/IsVariableInitialized_561" + input: "report_uninitialized_variables_1/IsVariableInitialized_562" + input: "report_uninitialized_variables_1/IsVariableInitialized_563" + input: "report_uninitialized_variables_1/IsVariableInitialized_564" + input: "report_uninitialized_variables_1/IsVariableInitialized_565" + input: "report_uninitialized_variables_1/IsVariableInitialized_566" + input: "report_uninitialized_variables_1/IsVariableInitialized_567" + input: "report_uninitialized_variables_1/IsVariableInitialized_568" + input: "report_uninitialized_variables_1/IsVariableInitialized_569" + input: "report_uninitialized_variables_1/IsVariableInitialized_570" + input: "report_uninitialized_variables_1/IsVariableInitialized_571" + input: "report_uninitialized_variables_1/IsVariableInitialized_572" + input: "report_uninitialized_variables_1/IsVariableInitialized_573" + input: "report_uninitialized_variables_1/IsVariableInitialized_574" + input: "report_uninitialized_variables_1/IsVariableInitialized_575" + input: "report_uninitialized_variables_1/IsVariableInitialized_576" + input: "report_uninitialized_variables_1/IsVariableInitialized_577" + input: "report_uninitialized_variables_1/IsVariableInitialized_578" + input: "report_uninitialized_variables_1/IsVariableInitialized_579" + input: "report_uninitialized_variables_1/IsVariableInitialized_580" + input: "report_uninitialized_variables_1/IsVariableInitialized_581" + input: "report_uninitialized_variables_1/IsVariableInitialized_582" + input: "report_uninitialized_variables_1/IsVariableInitialized_583" + input: "report_uninitialized_variables_1/IsVariableInitialized_584" + input: "report_uninitialized_variables_1/IsVariableInitialized_585" + input: "report_uninitialized_variables_1/IsVariableInitialized_586" + input: "report_uninitialized_variables_1/IsVariableInitialized_587" + input: "report_uninitialized_variables_1/IsVariableInitialized_588" + input: "report_uninitialized_variables_1/IsVariableInitialized_589" + input: "report_uninitialized_variables_1/IsVariableInitialized_590" + input: "report_uninitialized_variables_1/IsVariableInitialized_591" + input: "report_uninitialized_variables_1/IsVariableInitialized_592" + input: "report_uninitialized_variables_1/IsVariableInitialized_593" + input: "report_uninitialized_variables_1/IsVariableInitialized_594" + input: "report_uninitialized_variables_1/IsVariableInitialized_595" + input: "report_uninitialized_variables_1/IsVariableInitialized_596" + input: "report_uninitialized_variables_1/IsVariableInitialized_597" + input: "report_uninitialized_variables_1/IsVariableInitialized_598" + input: "report_uninitialized_variables_1/IsVariableInitialized_599" + input: "report_uninitialized_variables_1/IsVariableInitialized_600" + input: "report_uninitialized_variables_1/IsVariableInitialized_601" + input: "report_uninitialized_variables_1/IsVariableInitialized_602" + input: "report_uninitialized_variables_1/IsVariableInitialized_603" + input: "report_uninitialized_variables_1/IsVariableInitialized_604" + input: "report_uninitialized_variables_1/IsVariableInitialized_605" + input: "report_uninitialized_variables_1/IsVariableInitialized_606" + input: "report_uninitialized_variables_1/IsVariableInitialized_607" + input: "report_uninitialized_variables_1/IsVariableInitialized_608" + input: "report_uninitialized_variables_1/IsVariableInitialized_609" + input: "report_uninitialized_variables_1/IsVariableInitialized_610" + input: "report_uninitialized_variables_1/IsVariableInitialized_611" + input: "report_uninitialized_variables_1/IsVariableInitialized_612" + input: "report_uninitialized_variables_1/IsVariableInitialized_613" + input: "report_uninitialized_variables_1/IsVariableInitialized_614" + input: "report_uninitialized_variables_1/IsVariableInitialized_615" + input: "report_uninitialized_variables_1/IsVariableInitialized_616" + input: "report_uninitialized_variables_1/IsVariableInitialized_617" + device: "/device:CPU:0" + attr { + key: "N" + value { + i: 619 + } + } + attr { + key: "T" + value { + type: DT_BOOL + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 619 + } + } + } + } + } + attr { + key: "axis" + value { + i: 0 + } + } +} +node { + name: "report_uninitialized_variables_1/LogicalNot" + op: "LogicalNot" + input: "report_uninitialized_variables_1/stack" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 619 + } + } + } + } + } +} +node { + name: "report_uninitialized_variables_1/Const" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 619 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 619 + } + } + string_val: "global_step" + string_val: "bert/embeddings/word_embeddings" + string_val: "bert/embeddings/token_type_embeddings" + string_val: "bert/embeddings/position_embeddings" + string_val: "bert/embeddings/LayerNorm/beta" + string_val: "bert/embeddings/LayerNorm/gamma" + string_val: "bert/encoder/layer_0/attention/self/query/kernel" + string_val: "bert/encoder/layer_0/attention/self/query/bias" + string_val: "bert/encoder/layer_0/attention/self/key/kernel" + string_val: "bert/encoder/layer_0/attention/self/key/bias" + string_val: "bert/encoder/layer_0/attention/self/value/kernel" + string_val: "bert/encoder/layer_0/attention/self/value/bias" + string_val: "bert/encoder/layer_0/attention/output/dense/kernel" + string_val: "bert/encoder/layer_0/attention/output/dense/bias" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_0/intermediate/dense/kernel" + string_val: "bert/encoder/layer_0/intermediate/dense/bias" + string_val: "bert/encoder/layer_0/output/dense/kernel" + string_val: "bert/encoder/layer_0/output/dense/bias" + string_val: "bert/encoder/layer_0/output/LayerNorm/beta" + string_val: "bert/encoder/layer_0/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_1/attention/self/query/kernel" + string_val: "bert/encoder/layer_1/attention/self/query/bias" + string_val: "bert/encoder/layer_1/attention/self/key/kernel" + string_val: "bert/encoder/layer_1/attention/self/key/bias" + string_val: "bert/encoder/layer_1/attention/self/value/kernel" + string_val: "bert/encoder/layer_1/attention/self/value/bias" + string_val: "bert/encoder/layer_1/attention/output/dense/kernel" + string_val: "bert/encoder/layer_1/attention/output/dense/bias" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_1/intermediate/dense/kernel" + string_val: "bert/encoder/layer_1/intermediate/dense/bias" + string_val: "bert/encoder/layer_1/output/dense/kernel" + string_val: "bert/encoder/layer_1/output/dense/bias" + string_val: "bert/encoder/layer_1/output/LayerNorm/beta" + string_val: "bert/encoder/layer_1/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_2/attention/self/query/kernel" + string_val: "bert/encoder/layer_2/attention/self/query/bias" + string_val: "bert/encoder/layer_2/attention/self/key/kernel" + string_val: "bert/encoder/layer_2/attention/self/key/bias" + string_val: "bert/encoder/layer_2/attention/self/value/kernel" + string_val: "bert/encoder/layer_2/attention/self/value/bias" + string_val: "bert/encoder/layer_2/attention/output/dense/kernel" + string_val: "bert/encoder/layer_2/attention/output/dense/bias" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_2/intermediate/dense/kernel" + string_val: "bert/encoder/layer_2/intermediate/dense/bias" + string_val: "bert/encoder/layer_2/output/dense/kernel" + string_val: "bert/encoder/layer_2/output/dense/bias" + string_val: "bert/encoder/layer_2/output/LayerNorm/beta" + string_val: "bert/encoder/layer_2/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_3/attention/self/query/kernel" + string_val: "bert/encoder/layer_3/attention/self/query/bias" + string_val: "bert/encoder/layer_3/attention/self/key/kernel" + string_val: "bert/encoder/layer_3/attention/self/key/bias" + string_val: "bert/encoder/layer_3/attention/self/value/kernel" + string_val: "bert/encoder/layer_3/attention/self/value/bias" + string_val: "bert/encoder/layer_3/attention/output/dense/kernel" + string_val: "bert/encoder/layer_3/attention/output/dense/bias" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_3/intermediate/dense/kernel" + string_val: "bert/encoder/layer_3/intermediate/dense/bias" + string_val: "bert/encoder/layer_3/output/dense/kernel" + string_val: "bert/encoder/layer_3/output/dense/bias" + string_val: "bert/encoder/layer_3/output/LayerNorm/beta" + string_val: "bert/encoder/layer_3/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_4/attention/self/query/kernel" + string_val: "bert/encoder/layer_4/attention/self/query/bias" + string_val: "bert/encoder/layer_4/attention/self/key/kernel" + string_val: "bert/encoder/layer_4/attention/self/key/bias" + string_val: "bert/encoder/layer_4/attention/self/value/kernel" + string_val: "bert/encoder/layer_4/attention/self/value/bias" + string_val: "bert/encoder/layer_4/attention/output/dense/kernel" + string_val: "bert/encoder/layer_4/attention/output/dense/bias" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_4/intermediate/dense/kernel" + string_val: "bert/encoder/layer_4/intermediate/dense/bias" + string_val: "bert/encoder/layer_4/output/dense/kernel" + string_val: "bert/encoder/layer_4/output/dense/bias" + string_val: "bert/encoder/layer_4/output/LayerNorm/beta" + string_val: "bert/encoder/layer_4/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_5/attention/self/query/kernel" + string_val: "bert/encoder/layer_5/attention/self/query/bias" + string_val: "bert/encoder/layer_5/attention/self/key/kernel" + string_val: "bert/encoder/layer_5/attention/self/key/bias" + string_val: "bert/encoder/layer_5/attention/self/value/kernel" + string_val: "bert/encoder/layer_5/attention/self/value/bias" + string_val: "bert/encoder/layer_5/attention/output/dense/kernel" + string_val: "bert/encoder/layer_5/attention/output/dense/bias" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_5/intermediate/dense/kernel" + string_val: "bert/encoder/layer_5/intermediate/dense/bias" + string_val: "bert/encoder/layer_5/output/dense/kernel" + string_val: "bert/encoder/layer_5/output/dense/bias" + string_val: "bert/encoder/layer_5/output/LayerNorm/beta" + string_val: "bert/encoder/layer_5/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_6/attention/self/query/kernel" + string_val: "bert/encoder/layer_6/attention/self/query/bias" + string_val: "bert/encoder/layer_6/attention/self/key/kernel" + string_val: "bert/encoder/layer_6/attention/self/key/bias" + string_val: "bert/encoder/layer_6/attention/self/value/kernel" + string_val: "bert/encoder/layer_6/attention/self/value/bias" + string_val: "bert/encoder/layer_6/attention/output/dense/kernel" + string_val: "bert/encoder/layer_6/attention/output/dense/bias" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_6/intermediate/dense/kernel" + string_val: "bert/encoder/layer_6/intermediate/dense/bias" + string_val: "bert/encoder/layer_6/output/dense/kernel" + string_val: "bert/encoder/layer_6/output/dense/bias" + string_val: "bert/encoder/layer_6/output/LayerNorm/beta" + string_val: "bert/encoder/layer_6/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_7/attention/self/query/kernel" + string_val: "bert/encoder/layer_7/attention/self/query/bias" + string_val: "bert/encoder/layer_7/attention/self/key/kernel" + string_val: "bert/encoder/layer_7/attention/self/key/bias" + string_val: "bert/encoder/layer_7/attention/self/value/kernel" + string_val: "bert/encoder/layer_7/attention/self/value/bias" + string_val: "bert/encoder/layer_7/attention/output/dense/kernel" + string_val: "bert/encoder/layer_7/attention/output/dense/bias" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_7/intermediate/dense/kernel" + string_val: "bert/encoder/layer_7/intermediate/dense/bias" + string_val: "bert/encoder/layer_7/output/dense/kernel" + string_val: "bert/encoder/layer_7/output/dense/bias" + string_val: "bert/encoder/layer_7/output/LayerNorm/beta" + string_val: "bert/encoder/layer_7/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_8/attention/self/query/kernel" + string_val: "bert/encoder/layer_8/attention/self/query/bias" + string_val: "bert/encoder/layer_8/attention/self/key/kernel" + string_val: "bert/encoder/layer_8/attention/self/key/bias" + string_val: "bert/encoder/layer_8/attention/self/value/kernel" + string_val: "bert/encoder/layer_8/attention/self/value/bias" + string_val: "bert/encoder/layer_8/attention/output/dense/kernel" + string_val: "bert/encoder/layer_8/attention/output/dense/bias" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_8/intermediate/dense/kernel" + string_val: "bert/encoder/layer_8/intermediate/dense/bias" + string_val: "bert/encoder/layer_8/output/dense/kernel" + string_val: "bert/encoder/layer_8/output/dense/bias" + string_val: "bert/encoder/layer_8/output/LayerNorm/beta" + string_val: "bert/encoder/layer_8/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_9/attention/self/query/kernel" + string_val: "bert/encoder/layer_9/attention/self/query/bias" + string_val: "bert/encoder/layer_9/attention/self/key/kernel" + string_val: "bert/encoder/layer_9/attention/self/key/bias" + string_val: "bert/encoder/layer_9/attention/self/value/kernel" + string_val: "bert/encoder/layer_9/attention/self/value/bias" + string_val: "bert/encoder/layer_9/attention/output/dense/kernel" + string_val: "bert/encoder/layer_9/attention/output/dense/bias" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_9/intermediate/dense/kernel" + string_val: "bert/encoder/layer_9/intermediate/dense/bias" + string_val: "bert/encoder/layer_9/output/dense/kernel" + string_val: "bert/encoder/layer_9/output/dense/bias" + string_val: "bert/encoder/layer_9/output/LayerNorm/beta" + string_val: "bert/encoder/layer_9/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_10/attention/self/query/kernel" + string_val: "bert/encoder/layer_10/attention/self/query/bias" + string_val: "bert/encoder/layer_10/attention/self/key/kernel" + string_val: "bert/encoder/layer_10/attention/self/key/bias" + string_val: "bert/encoder/layer_10/attention/self/value/kernel" + string_val: "bert/encoder/layer_10/attention/self/value/bias" + string_val: "bert/encoder/layer_10/attention/output/dense/kernel" + string_val: "bert/encoder/layer_10/attention/output/dense/bias" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_10/intermediate/dense/kernel" + string_val: "bert/encoder/layer_10/intermediate/dense/bias" + string_val: "bert/encoder/layer_10/output/dense/kernel" + string_val: "bert/encoder/layer_10/output/dense/bias" + string_val: "bert/encoder/layer_10/output/LayerNorm/beta" + string_val: "bert/encoder/layer_10/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_11/attention/self/query/kernel" + string_val: "bert/encoder/layer_11/attention/self/query/bias" + string_val: "bert/encoder/layer_11/attention/self/key/kernel" + string_val: "bert/encoder/layer_11/attention/self/key/bias" + string_val: "bert/encoder/layer_11/attention/self/value/kernel" + string_val: "bert/encoder/layer_11/attention/self/value/bias" + string_val: "bert/encoder/layer_11/attention/output/dense/kernel" + string_val: "bert/encoder/layer_11/attention/output/dense/bias" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_11/intermediate/dense/kernel" + string_val: "bert/encoder/layer_11/intermediate/dense/bias" + string_val: "bert/encoder/layer_11/output/dense/kernel" + string_val: "bert/encoder/layer_11/output/dense/bias" + string_val: "bert/encoder/layer_11/output/LayerNorm/beta" + string_val: "bert/encoder/layer_11/output/LayerNorm/gamma" + string_val: "bert/pooler/dense/kernel" + string_val: "bert/pooler/dense/bias" + string_val: "cls/predictions/transform/dense/kernel" + string_val: "cls/predictions/transform/dense/bias" + string_val: "cls/predictions/transform/LayerNorm/beta" + string_val: "cls/predictions/transform/LayerNorm/gamma" + string_val: "cls/predictions/output_bias" + string_val: "cls/seq_relationship/output_weights" + string_val: "cls/seq_relationship/output_bias" + string_val: "bert/embeddings/word_embeddings/adam_m" + string_val: "bert/embeddings/word_embeddings/adam_v" + string_val: "bert/embeddings/token_type_embeddings/adam_m" + string_val: "bert/embeddings/token_type_embeddings/adam_v" + string_val: "bert/embeddings/position_embeddings/adam_m" + string_val: "bert/embeddings/position_embeddings/adam_v" + string_val: "bert/embeddings/LayerNorm/beta/adam_m" + string_val: "bert/embeddings/LayerNorm/beta/adam_v" + string_val: "bert/embeddings/LayerNorm/gamma/adam_m" + string_val: "bert/embeddings/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_0/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_0/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_0/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_0/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_0/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_0/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_0/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_0/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_0/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_0/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_0/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_0/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_0/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_0/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_0/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_0/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_0/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_0/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_0/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_0/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_1/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_1/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_1/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_1/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_1/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_1/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_1/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_1/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_1/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_1/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_1/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_1/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_1/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_1/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_1/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_1/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_1/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_1/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_1/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_1/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_2/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_2/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_2/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_2/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_2/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_2/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_2/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_2/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_2/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_2/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_2/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_2/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_2/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_2/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_2/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_2/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_2/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_2/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_2/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_2/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_3/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_3/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_3/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_3/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_3/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_3/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_3/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_3/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_3/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_3/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_3/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_3/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_3/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_3/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_3/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_3/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_3/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_3/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_3/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_3/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_4/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_4/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_4/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_4/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_4/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_4/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_4/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_4/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_4/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_4/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_4/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_4/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_4/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_4/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_4/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_4/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_4/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_4/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_4/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_4/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_5/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_5/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_5/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_5/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_5/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_5/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_5/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_5/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_5/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_5/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_5/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_5/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_5/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_5/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_5/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_5/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_5/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_5/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_5/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_5/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_6/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_6/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_6/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_6/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_6/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_6/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_6/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_6/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_6/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_6/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_6/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_6/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_6/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_6/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_6/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_6/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_6/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_6/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_6/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_6/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_7/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_7/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_7/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_7/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_7/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_7/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_7/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_7/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_7/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_7/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_7/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_7/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_7/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_7/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_7/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_7/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_7/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_7/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_7/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_7/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_8/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_8/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_8/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_8/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_8/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_8/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_8/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_8/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_8/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_8/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_8/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_8/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_8/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_8/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_8/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_8/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_8/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_8/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_8/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_8/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_9/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_9/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_9/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_9/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_9/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_9/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_9/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_9/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_9/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_9/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_9/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_9/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_9/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_9/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_9/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_9/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_9/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_9/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_9/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_9/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_10/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_10/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_10/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_10/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_10/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_10/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_10/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_10/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_10/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_10/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_10/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_10/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_10/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_10/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_10/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_10/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_10/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_10/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_10/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_10/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_11/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_11/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_11/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_11/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_11/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_11/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_11/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_11/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_11/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_11/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_11/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_11/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_11/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_11/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_11/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_11/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_11/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_11/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_11/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_11/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v" + string_val: "bert/pooler/dense/kernel/adam_m" + string_val: "bert/pooler/dense/kernel/adam_v" + string_val: "bert/pooler/dense/bias/adam_m" + string_val: "bert/pooler/dense/bias/adam_v" + string_val: "cls/predictions/transform/dense/kernel/adam_m" + string_val: "cls/predictions/transform/dense/kernel/adam_v" + string_val: "cls/predictions/transform/dense/bias/adam_m" + string_val: "cls/predictions/transform/dense/bias/adam_v" + string_val: "cls/predictions/transform/LayerNorm/beta/adam_m" + string_val: "cls/predictions/transform/LayerNorm/beta/adam_v" + string_val: "cls/predictions/transform/LayerNorm/gamma/adam_m" + string_val: "cls/predictions/transform/LayerNorm/gamma/adam_v" + string_val: "cls/predictions/output_bias/adam_m" + string_val: "cls/predictions/output_bias/adam_v" + string_val: "cls/seq_relationship/output_weights/adam_m" + string_val: "cls/seq_relationship/output_weights/adam_v" + string_val: "cls/seq_relationship/output_bias/adam_m" + string_val: "cls/seq_relationship/output_bias/adam_v" + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/Shape" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 619 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/strided_slice/stack" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/strided_slice/stack_1" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/strided_slice/stack_2" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/strided_slice" + op: "StridedSlice" + input: "report_uninitialized_variables_1/boolean_mask/Shape" + input: "report_uninitialized_variables_1/boolean_mask/strided_slice/stack" + input: "report_uninitialized_variables_1/boolean_mask/strided_slice/stack_1" + input: "report_uninitialized_variables_1/boolean_mask/strided_slice/stack_2" + device: "/device:CPU:0" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/Prod/reduction_indices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/Prod" + op: "Prod" + input: "report_uninitialized_variables_1/boolean_mask/strided_slice" + input: "report_uninitialized_variables_1/boolean_mask/Prod/reduction_indices" + device: "/device:CPU:0" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/Shape_1" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 619 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/strided_slice_1/stack" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/strided_slice_1/stack_1" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/strided_slice_1/stack_2" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/strided_slice_1" + op: "StridedSlice" + input: "report_uninitialized_variables_1/boolean_mask/Shape_1" + input: "report_uninitialized_variables_1/boolean_mask/strided_slice_1/stack" + input: "report_uninitialized_variables_1/boolean_mask/strided_slice_1/stack_1" + input: "report_uninitialized_variables_1/boolean_mask/strided_slice_1/stack_2" + device: "/device:CPU:0" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 1 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/Shape_2" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 619 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/strided_slice_2/stack" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/strided_slice_2/stack_1" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/strided_slice_2/stack_2" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/strided_slice_2" + op: "StridedSlice" + input: "report_uninitialized_variables_1/boolean_mask/Shape_2" + input: "report_uninitialized_variables_1/boolean_mask/strided_slice_2/stack" + input: "report_uninitialized_variables_1/boolean_mask/strided_slice_2/stack_1" + input: "report_uninitialized_variables_1/boolean_mask/strided_slice_2/stack_2" + device: "/device:CPU:0" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 1 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/concat/values_1" + op: "Pack" + input: "report_uninitialized_variables_1/boolean_mask/Prod" + device: "/device:CPU:0" + attr { + key: "N" + value { + i: 1 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "axis" + value { + i: 0 + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/concat/axis" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/concat" + op: "ConcatV2" + input: "report_uninitialized_variables_1/boolean_mask/strided_slice_1" + input: "report_uninitialized_variables_1/boolean_mask/concat/values_1" + input: "report_uninitialized_variables_1/boolean_mask/strided_slice_2" + input: "report_uninitialized_variables_1/boolean_mask/concat/axis" + device: "/device:CPU:0" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/Reshape" + op: "Reshape" + input: "report_uninitialized_variables_1/Const" + input: "report_uninitialized_variables_1/boolean_mask/concat" + device: "/device:CPU:0" + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 619 + } + } + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/Reshape_1/shape" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -1 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/Reshape_1" + op: "Reshape" + input: "report_uninitialized_variables_1/LogicalNot" + input: "report_uninitialized_variables_1/boolean_mask/Reshape_1/shape" + device: "/device:CPU:0" + attr { + key: "T" + value { + type: DT_BOOL + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 619 + } + } + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/Where" + op: "Where" + input: "report_uninitialized_variables_1/boolean_mask/Reshape_1" + device: "/device:CPU:0" + attr { + key: "T" + value { + type: DT_BOOL + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/Squeeze" + op: "Squeeze" + input: "report_uninitialized_variables_1/boolean_mask/Where" + device: "/device:CPU:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + } + } + } + attr { + key: "squeeze_dims" + value { + list { + i: 1 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/GatherV2/axis" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/GatherV2" + op: "GatherV2" + input: "report_uninitialized_variables_1/boolean_mask/Reshape" + input: "report_uninitialized_variables_1/boolean_mask/Squeeze" + input: "report_uninitialized_variables_1/boolean_mask/GatherV2/axis" + device: "/device:CPU:0" + attr { + key: "Taxis" + value { + type: DT_INT32 + } + } + attr { + key: "Tindices" + value { + type: DT_INT64 + } + } + attr { + key: "Tparams" + value { + type: DT_STRING + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "init_2" + op: "NoOp" +} +node { + name: "init_all_tables" + op: "NoOp" +} +node { + name: "init_3" + op: "NoOp" +} +node { + name: "group_deps_3" + op: "NoOp" + input: "^init_2" + input: "^init_3" + input: "^init_all_tables" +} +node { + name: "Merge/MergeSummary" + op: "MergeSummary" + input: "loss" + attr { + key: "N" + value { + i: 1 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "save/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "model" + } + } + } +} +node { + name: "save/StringJoin/inputs_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "_temp_43aab69cd23d4d6a8dac3b1098a2da70/part" + } + } + } +} +node { + name: "save/StringJoin" + op: "StringJoin" + input: "save/Const" + input: "save/StringJoin/inputs_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "separator" + value { + s: "" + } + } +} +node { + name: "save/num_shards" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "save/ShardedFilename/shard" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "save/ShardedFilename" + op: "ShardedFilename" + input: "save/StringJoin" + input: "save/ShardedFilename/shard" + input: "save/num_shards" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "save/SaveV2/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 619 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 619 + } + } + string_val: "bert/embeddings/LayerNorm/beta" + string_val: "bert/embeddings/LayerNorm/beta/adam_m" + string_val: "bert/embeddings/LayerNorm/beta/adam_v" + string_val: "bert/embeddings/LayerNorm/gamma" + string_val: "bert/embeddings/LayerNorm/gamma/adam_m" + string_val: "bert/embeddings/LayerNorm/gamma/adam_v" + string_val: "bert/embeddings/position_embeddings" + string_val: "bert/embeddings/position_embeddings/adam_m" + string_val: "bert/embeddings/position_embeddings/adam_v" + string_val: "bert/embeddings/token_type_embeddings" + string_val: "bert/embeddings/token_type_embeddings/adam_m" + string_val: "bert/embeddings/token_type_embeddings/adam_v" + string_val: "bert/embeddings/word_embeddings" + string_val: "bert/embeddings/word_embeddings/adam_m" + string_val: "bert/embeddings/word_embeddings/adam_v" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_0/attention/output/dense/bias" + string_val: "bert/encoder/layer_0/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_0/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_0/attention/output/dense/kernel" + string_val: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_0/attention/self/key/bias" + string_val: "bert/encoder/layer_0/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_0/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_0/attention/self/key/kernel" + string_val: "bert/encoder/layer_0/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_0/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_0/attention/self/query/bias" + string_val: "bert/encoder/layer_0/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_0/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_0/attention/self/query/kernel" + string_val: "bert/encoder/layer_0/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_0/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_0/attention/self/value/bias" + string_val: "bert/encoder/layer_0/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_0/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_0/attention/self/value/kernel" + string_val: "bert/encoder/layer_0/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_0/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_0/intermediate/dense/bias" + string_val: "bert/encoder/layer_0/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_0/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_0/intermediate/dense/kernel" + string_val: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_0/output/LayerNorm/beta" + string_val: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_0/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_0/output/dense/bias" + string_val: "bert/encoder/layer_0/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_0/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_0/output/dense/kernel" + string_val: "bert/encoder/layer_0/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_0/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_1/attention/output/dense/bias" + string_val: "bert/encoder/layer_1/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_1/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_1/attention/output/dense/kernel" + string_val: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_1/attention/self/key/bias" + string_val: "bert/encoder/layer_1/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_1/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_1/attention/self/key/kernel" + string_val: "bert/encoder/layer_1/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_1/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_1/attention/self/query/bias" + string_val: "bert/encoder/layer_1/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_1/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_1/attention/self/query/kernel" + string_val: "bert/encoder/layer_1/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_1/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_1/attention/self/value/bias" + string_val: "bert/encoder/layer_1/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_1/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_1/attention/self/value/kernel" + string_val: "bert/encoder/layer_1/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_1/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_1/intermediate/dense/bias" + string_val: "bert/encoder/layer_1/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_1/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_1/intermediate/dense/kernel" + string_val: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_1/output/LayerNorm/beta" + string_val: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_1/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_1/output/dense/bias" + string_val: "bert/encoder/layer_1/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_1/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_1/output/dense/kernel" + string_val: "bert/encoder/layer_1/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_1/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_10/attention/output/dense/bias" + string_val: "bert/encoder/layer_10/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_10/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_10/attention/output/dense/kernel" + string_val: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_10/attention/self/key/bias" + string_val: "bert/encoder/layer_10/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_10/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_10/attention/self/key/kernel" + string_val: "bert/encoder/layer_10/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_10/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_10/attention/self/query/bias" + string_val: "bert/encoder/layer_10/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_10/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_10/attention/self/query/kernel" + string_val: "bert/encoder/layer_10/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_10/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_10/attention/self/value/bias" + string_val: "bert/encoder/layer_10/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_10/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_10/attention/self/value/kernel" + string_val: "bert/encoder/layer_10/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_10/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_10/intermediate/dense/bias" + string_val: "bert/encoder/layer_10/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_10/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_10/intermediate/dense/kernel" + string_val: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_10/output/LayerNorm/beta" + string_val: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_10/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_10/output/dense/bias" + string_val: "bert/encoder/layer_10/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_10/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_10/output/dense/kernel" + string_val: "bert/encoder/layer_10/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_10/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_11/attention/output/dense/bias" + string_val: "bert/encoder/layer_11/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_11/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_11/attention/output/dense/kernel" + string_val: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_11/attention/self/key/bias" + string_val: "bert/encoder/layer_11/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_11/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_11/attention/self/key/kernel" + string_val: "bert/encoder/layer_11/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_11/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_11/attention/self/query/bias" + string_val: "bert/encoder/layer_11/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_11/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_11/attention/self/query/kernel" + string_val: "bert/encoder/layer_11/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_11/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_11/attention/self/value/bias" + string_val: "bert/encoder/layer_11/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_11/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_11/attention/self/value/kernel" + string_val: "bert/encoder/layer_11/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_11/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_11/intermediate/dense/bias" + string_val: "bert/encoder/layer_11/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_11/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_11/intermediate/dense/kernel" + string_val: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_11/output/LayerNorm/beta" + string_val: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_11/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_11/output/dense/bias" + string_val: "bert/encoder/layer_11/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_11/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_11/output/dense/kernel" + string_val: "bert/encoder/layer_11/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_11/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_2/attention/output/dense/bias" + string_val: "bert/encoder/layer_2/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_2/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_2/attention/output/dense/kernel" + string_val: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_2/attention/self/key/bias" + string_val: "bert/encoder/layer_2/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_2/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_2/attention/self/key/kernel" + string_val: "bert/encoder/layer_2/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_2/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_2/attention/self/query/bias" + string_val: "bert/encoder/layer_2/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_2/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_2/attention/self/query/kernel" + string_val: "bert/encoder/layer_2/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_2/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_2/attention/self/value/bias" + string_val: "bert/encoder/layer_2/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_2/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_2/attention/self/value/kernel" + string_val: "bert/encoder/layer_2/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_2/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_2/intermediate/dense/bias" + string_val: "bert/encoder/layer_2/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_2/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_2/intermediate/dense/kernel" + string_val: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_2/output/LayerNorm/beta" + string_val: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_2/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_2/output/dense/bias" + string_val: "bert/encoder/layer_2/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_2/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_2/output/dense/kernel" + string_val: "bert/encoder/layer_2/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_2/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_3/attention/output/dense/bias" + string_val: "bert/encoder/layer_3/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_3/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_3/attention/output/dense/kernel" + string_val: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_3/attention/self/key/bias" + string_val: "bert/encoder/layer_3/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_3/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_3/attention/self/key/kernel" + string_val: "bert/encoder/layer_3/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_3/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_3/attention/self/query/bias" + string_val: "bert/encoder/layer_3/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_3/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_3/attention/self/query/kernel" + string_val: "bert/encoder/layer_3/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_3/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_3/attention/self/value/bias" + string_val: "bert/encoder/layer_3/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_3/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_3/attention/self/value/kernel" + string_val: "bert/encoder/layer_3/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_3/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_3/intermediate/dense/bias" + string_val: "bert/encoder/layer_3/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_3/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_3/intermediate/dense/kernel" + string_val: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_3/output/LayerNorm/beta" + string_val: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_3/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_3/output/dense/bias" + string_val: "bert/encoder/layer_3/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_3/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_3/output/dense/kernel" + string_val: "bert/encoder/layer_3/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_3/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_4/attention/output/dense/bias" + string_val: "bert/encoder/layer_4/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_4/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_4/attention/output/dense/kernel" + string_val: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_4/attention/self/key/bias" + string_val: "bert/encoder/layer_4/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_4/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_4/attention/self/key/kernel" + string_val: "bert/encoder/layer_4/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_4/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_4/attention/self/query/bias" + string_val: "bert/encoder/layer_4/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_4/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_4/attention/self/query/kernel" + string_val: "bert/encoder/layer_4/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_4/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_4/attention/self/value/bias" + string_val: "bert/encoder/layer_4/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_4/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_4/attention/self/value/kernel" + string_val: "bert/encoder/layer_4/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_4/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_4/intermediate/dense/bias" + string_val: "bert/encoder/layer_4/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_4/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_4/intermediate/dense/kernel" + string_val: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_4/output/LayerNorm/beta" + string_val: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_4/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_4/output/dense/bias" + string_val: "bert/encoder/layer_4/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_4/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_4/output/dense/kernel" + string_val: "bert/encoder/layer_4/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_4/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_5/attention/output/dense/bias" + string_val: "bert/encoder/layer_5/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_5/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_5/attention/output/dense/kernel" + string_val: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_5/attention/self/key/bias" + string_val: "bert/encoder/layer_5/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_5/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_5/attention/self/key/kernel" + string_val: "bert/encoder/layer_5/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_5/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_5/attention/self/query/bias" + string_val: "bert/encoder/layer_5/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_5/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_5/attention/self/query/kernel" + string_val: "bert/encoder/layer_5/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_5/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_5/attention/self/value/bias" + string_val: "bert/encoder/layer_5/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_5/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_5/attention/self/value/kernel" + string_val: "bert/encoder/layer_5/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_5/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_5/intermediate/dense/bias" + string_val: "bert/encoder/layer_5/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_5/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_5/intermediate/dense/kernel" + string_val: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_5/output/LayerNorm/beta" + string_val: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_5/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_5/output/dense/bias" + string_val: "bert/encoder/layer_5/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_5/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_5/output/dense/kernel" + string_val: "bert/encoder/layer_5/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_5/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_6/attention/output/dense/bias" + string_val: "bert/encoder/layer_6/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_6/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_6/attention/output/dense/kernel" + string_val: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_6/attention/self/key/bias" + string_val: "bert/encoder/layer_6/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_6/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_6/attention/self/key/kernel" + string_val: "bert/encoder/layer_6/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_6/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_6/attention/self/query/bias" + string_val: "bert/encoder/layer_6/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_6/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_6/attention/self/query/kernel" + string_val: "bert/encoder/layer_6/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_6/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_6/attention/self/value/bias" + string_val: "bert/encoder/layer_6/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_6/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_6/attention/self/value/kernel" + string_val: "bert/encoder/layer_6/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_6/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_6/intermediate/dense/bias" + string_val: "bert/encoder/layer_6/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_6/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_6/intermediate/dense/kernel" + string_val: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_6/output/LayerNorm/beta" + string_val: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_6/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_6/output/dense/bias" + string_val: "bert/encoder/layer_6/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_6/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_6/output/dense/kernel" + string_val: "bert/encoder/layer_6/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_6/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_7/attention/output/dense/bias" + string_val: "bert/encoder/layer_7/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_7/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_7/attention/output/dense/kernel" + string_val: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_7/attention/self/key/bias" + string_val: "bert/encoder/layer_7/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_7/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_7/attention/self/key/kernel" + string_val: "bert/encoder/layer_7/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_7/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_7/attention/self/query/bias" + string_val: "bert/encoder/layer_7/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_7/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_7/attention/self/query/kernel" + string_val: "bert/encoder/layer_7/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_7/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_7/attention/self/value/bias" + string_val: "bert/encoder/layer_7/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_7/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_7/attention/self/value/kernel" + string_val: "bert/encoder/layer_7/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_7/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_7/intermediate/dense/bias" + string_val: "bert/encoder/layer_7/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_7/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_7/intermediate/dense/kernel" + string_val: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_7/output/LayerNorm/beta" + string_val: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_7/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_7/output/dense/bias" + string_val: "bert/encoder/layer_7/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_7/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_7/output/dense/kernel" + string_val: "bert/encoder/layer_7/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_7/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_8/attention/output/dense/bias" + string_val: "bert/encoder/layer_8/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_8/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_8/attention/output/dense/kernel" + string_val: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_8/attention/self/key/bias" + string_val: "bert/encoder/layer_8/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_8/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_8/attention/self/key/kernel" + string_val: "bert/encoder/layer_8/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_8/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_8/attention/self/query/bias" + string_val: "bert/encoder/layer_8/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_8/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_8/attention/self/query/kernel" + string_val: "bert/encoder/layer_8/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_8/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_8/attention/self/value/bias" + string_val: "bert/encoder/layer_8/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_8/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_8/attention/self/value/kernel" + string_val: "bert/encoder/layer_8/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_8/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_8/intermediate/dense/bias" + string_val: "bert/encoder/layer_8/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_8/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_8/intermediate/dense/kernel" + string_val: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_8/output/LayerNorm/beta" + string_val: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_8/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_8/output/dense/bias" + string_val: "bert/encoder/layer_8/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_8/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_8/output/dense/kernel" + string_val: "bert/encoder/layer_8/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_8/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_9/attention/output/dense/bias" + string_val: "bert/encoder/layer_9/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_9/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_9/attention/output/dense/kernel" + string_val: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_9/attention/self/key/bias" + string_val: "bert/encoder/layer_9/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_9/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_9/attention/self/key/kernel" + string_val: "bert/encoder/layer_9/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_9/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_9/attention/self/query/bias" + string_val: "bert/encoder/layer_9/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_9/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_9/attention/self/query/kernel" + string_val: "bert/encoder/layer_9/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_9/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_9/attention/self/value/bias" + string_val: "bert/encoder/layer_9/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_9/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_9/attention/self/value/kernel" + string_val: "bert/encoder/layer_9/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_9/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_9/intermediate/dense/bias" + string_val: "bert/encoder/layer_9/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_9/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_9/intermediate/dense/kernel" + string_val: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_9/output/LayerNorm/beta" + string_val: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_9/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_9/output/dense/bias" + string_val: "bert/encoder/layer_9/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_9/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_9/output/dense/kernel" + string_val: "bert/encoder/layer_9/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_9/output/dense/kernel/adam_v" + string_val: "bert/pooler/dense/bias" + string_val: "bert/pooler/dense/bias/adam_m" + string_val: "bert/pooler/dense/bias/adam_v" + string_val: "bert/pooler/dense/kernel" + string_val: "bert/pooler/dense/kernel/adam_m" + string_val: "bert/pooler/dense/kernel/adam_v" + string_val: "cls/predictions/output_bias" + string_val: "cls/predictions/output_bias/adam_m" + string_val: "cls/predictions/output_bias/adam_v" + string_val: "cls/predictions/transform/LayerNorm/beta" + string_val: "cls/predictions/transform/LayerNorm/beta/adam_m" + string_val: "cls/predictions/transform/LayerNorm/beta/adam_v" + string_val: "cls/predictions/transform/LayerNorm/gamma" + string_val: "cls/predictions/transform/LayerNorm/gamma/adam_m" + string_val: "cls/predictions/transform/LayerNorm/gamma/adam_v" + string_val: "cls/predictions/transform/dense/bias" + string_val: "cls/predictions/transform/dense/bias/adam_m" + string_val: "cls/predictions/transform/dense/bias/adam_v" + string_val: "cls/predictions/transform/dense/kernel" + string_val: "cls/predictions/transform/dense/kernel/adam_m" + string_val: "cls/predictions/transform/dense/kernel/adam_v" + string_val: "cls/seq_relationship/output_bias" + string_val: "cls/seq_relationship/output_bias/adam_m" + string_val: "cls/seq_relationship/output_bias/adam_v" + string_val: "cls/seq_relationship/output_weights" + string_val: "cls/seq_relationship/output_weights/adam_m" + string_val: "cls/seq_relationship/output_weights/adam_v" + string_val: "global_step" + } + } + } +} +node { + name: "save/SaveV2/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 619 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 619 + } + } + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + } + } + } +} +node { + name: "save/SaveV2" + op: "SaveV2" + input: "save/ShardedFilename" + input: "save/SaveV2/tensor_names" + input: "save/SaveV2/shape_and_slices" + input: "bert/embeddings/LayerNorm/beta" + input: "bert/embeddings/LayerNorm/beta/adam_m" + input: "bert/embeddings/LayerNorm/beta/adam_v" + input: "bert/embeddings/LayerNorm/gamma" + input: "bert/embeddings/LayerNorm/gamma/adam_m" + input: "bert/embeddings/LayerNorm/gamma/adam_v" + input: "bert/embeddings/position_embeddings" + input: "bert/embeddings/position_embeddings/adam_m" + input: "bert/embeddings/position_embeddings/adam_v" + input: "bert/embeddings/token_type_embeddings" + input: "bert/embeddings/token_type_embeddings/adam_m" + input: "bert/embeddings/token_type_embeddings/adam_v" + input: "bert/embeddings/word_embeddings" + input: "bert/embeddings/word_embeddings/adam_m" + input: "bert/embeddings/word_embeddings/adam_v" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_0/attention/output/dense/bias" + input: "bert/encoder/layer_0/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_0/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_0/attention/output/dense/kernel" + input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_0/attention/self/key/bias" + input: "bert/encoder/layer_0/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_0/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_0/attention/self/key/kernel" + input: "bert/encoder/layer_0/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_0/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_0/attention/self/query/bias" + input: "bert/encoder/layer_0/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_0/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_0/attention/self/query/kernel" + input: "bert/encoder/layer_0/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_0/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_0/attention/self/value/bias" + input: "bert/encoder/layer_0/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_0/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_0/attention/self/value/kernel" + input: "bert/encoder/layer_0/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_0/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_0/intermediate/dense/bias" + input: "bert/encoder/layer_0/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_0/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_0/intermediate/dense/kernel" + input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_0/output/LayerNorm/beta" + input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_0/output/LayerNorm/gamma" + input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_0/output/dense/bias" + input: "bert/encoder/layer_0/output/dense/bias/adam_m" + input: "bert/encoder/layer_0/output/dense/bias/adam_v" + input: "bert/encoder/layer_0/output/dense/kernel" + input: "bert/encoder/layer_0/output/dense/kernel/adam_m" + input: "bert/encoder/layer_0/output/dense/kernel/adam_v" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_1/attention/output/dense/bias" + input: "bert/encoder/layer_1/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_1/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_1/attention/output/dense/kernel" + input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_1/attention/self/key/bias" + input: "bert/encoder/layer_1/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_1/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_1/attention/self/key/kernel" + input: "bert/encoder/layer_1/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_1/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_1/attention/self/query/bias" + input: "bert/encoder/layer_1/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_1/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_1/attention/self/query/kernel" + input: "bert/encoder/layer_1/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_1/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_1/attention/self/value/bias" + input: "bert/encoder/layer_1/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_1/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_1/attention/self/value/kernel" + input: "bert/encoder/layer_1/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_1/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_1/intermediate/dense/bias" + input: "bert/encoder/layer_1/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_1/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_1/intermediate/dense/kernel" + input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_1/output/LayerNorm/beta" + input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_1/output/LayerNorm/gamma" + input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_1/output/dense/bias" + input: "bert/encoder/layer_1/output/dense/bias/adam_m" + input: "bert/encoder/layer_1/output/dense/bias/adam_v" + input: "bert/encoder/layer_1/output/dense/kernel" + input: "bert/encoder/layer_1/output/dense/kernel/adam_m" + input: "bert/encoder/layer_1/output/dense/kernel/adam_v" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_10/attention/output/dense/bias" + input: "bert/encoder/layer_10/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_10/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_10/attention/output/dense/kernel" + input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_10/attention/self/key/bias" + input: "bert/encoder/layer_10/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_10/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_10/attention/self/key/kernel" + input: "bert/encoder/layer_10/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_10/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_10/attention/self/query/bias" + input: "bert/encoder/layer_10/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_10/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_10/attention/self/query/kernel" + input: "bert/encoder/layer_10/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_10/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_10/attention/self/value/bias" + input: "bert/encoder/layer_10/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_10/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_10/attention/self/value/kernel" + input: "bert/encoder/layer_10/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_10/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_10/intermediate/dense/bias" + input: "bert/encoder/layer_10/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_10/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_10/intermediate/dense/kernel" + input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_10/output/LayerNorm/beta" + input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_10/output/LayerNorm/gamma" + input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_10/output/dense/bias" + input: "bert/encoder/layer_10/output/dense/bias/adam_m" + input: "bert/encoder/layer_10/output/dense/bias/adam_v" + input: "bert/encoder/layer_10/output/dense/kernel" + input: "bert/encoder/layer_10/output/dense/kernel/adam_m" + input: "bert/encoder/layer_10/output/dense/kernel/adam_v" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_11/attention/output/dense/bias" + input: "bert/encoder/layer_11/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_11/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_11/attention/output/dense/kernel" + input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_11/attention/self/key/bias" + input: "bert/encoder/layer_11/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_11/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_11/attention/self/key/kernel" + input: "bert/encoder/layer_11/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_11/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_11/attention/self/query/bias" + input: "bert/encoder/layer_11/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_11/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_11/attention/self/query/kernel" + input: "bert/encoder/layer_11/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_11/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_11/attention/self/value/bias" + input: "bert/encoder/layer_11/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_11/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_11/attention/self/value/kernel" + input: "bert/encoder/layer_11/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_11/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_11/intermediate/dense/bias" + input: "bert/encoder/layer_11/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_11/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_11/intermediate/dense/kernel" + input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_11/output/LayerNorm/beta" + input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_11/output/LayerNorm/gamma" + input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_11/output/dense/bias" + input: "bert/encoder/layer_11/output/dense/bias/adam_m" + input: "bert/encoder/layer_11/output/dense/bias/adam_v" + input: "bert/encoder/layer_11/output/dense/kernel" + input: "bert/encoder/layer_11/output/dense/kernel/adam_m" + input: "bert/encoder/layer_11/output/dense/kernel/adam_v" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_2/attention/output/dense/bias" + input: "bert/encoder/layer_2/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_2/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_2/attention/output/dense/kernel" + input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_2/attention/self/key/bias" + input: "bert/encoder/layer_2/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_2/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_2/attention/self/key/kernel" + input: "bert/encoder/layer_2/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_2/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_2/attention/self/query/bias" + input: "bert/encoder/layer_2/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_2/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_2/attention/self/query/kernel" + input: "bert/encoder/layer_2/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_2/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_2/attention/self/value/bias" + input: "bert/encoder/layer_2/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_2/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_2/attention/self/value/kernel" + input: "bert/encoder/layer_2/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_2/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_2/intermediate/dense/bias" + input: "bert/encoder/layer_2/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_2/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_2/intermediate/dense/kernel" + input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_2/output/LayerNorm/beta" + input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_2/output/LayerNorm/gamma" + input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_2/output/dense/bias" + input: "bert/encoder/layer_2/output/dense/bias/adam_m" + input: "bert/encoder/layer_2/output/dense/bias/adam_v" + input: "bert/encoder/layer_2/output/dense/kernel" + input: "bert/encoder/layer_2/output/dense/kernel/adam_m" + input: "bert/encoder/layer_2/output/dense/kernel/adam_v" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_3/attention/output/dense/bias" + input: "bert/encoder/layer_3/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_3/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_3/attention/output/dense/kernel" + input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_3/attention/self/key/bias" + input: "bert/encoder/layer_3/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_3/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_3/attention/self/key/kernel" + input: "bert/encoder/layer_3/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_3/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_3/attention/self/query/bias" + input: "bert/encoder/layer_3/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_3/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_3/attention/self/query/kernel" + input: "bert/encoder/layer_3/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_3/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_3/attention/self/value/bias" + input: "bert/encoder/layer_3/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_3/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_3/attention/self/value/kernel" + input: "bert/encoder/layer_3/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_3/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_3/intermediate/dense/bias" + input: "bert/encoder/layer_3/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_3/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_3/intermediate/dense/kernel" + input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_3/output/LayerNorm/beta" + input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_3/output/LayerNorm/gamma" + input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_3/output/dense/bias" + input: "bert/encoder/layer_3/output/dense/bias/adam_m" + input: "bert/encoder/layer_3/output/dense/bias/adam_v" + input: "bert/encoder/layer_3/output/dense/kernel" + input: "bert/encoder/layer_3/output/dense/kernel/adam_m" + input: "bert/encoder/layer_3/output/dense/kernel/adam_v" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_4/attention/output/dense/bias" + input: "bert/encoder/layer_4/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_4/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_4/attention/output/dense/kernel" + input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_4/attention/self/key/bias" + input: "bert/encoder/layer_4/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_4/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_4/attention/self/key/kernel" + input: "bert/encoder/layer_4/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_4/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_4/attention/self/query/bias" + input: "bert/encoder/layer_4/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_4/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_4/attention/self/query/kernel" + input: "bert/encoder/layer_4/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_4/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_4/attention/self/value/bias" + input: "bert/encoder/layer_4/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_4/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_4/attention/self/value/kernel" + input: "bert/encoder/layer_4/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_4/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_4/intermediate/dense/bias" + input: "bert/encoder/layer_4/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_4/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_4/intermediate/dense/kernel" + input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_4/output/LayerNorm/beta" + input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_4/output/LayerNorm/gamma" + input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_4/output/dense/bias" + input: "bert/encoder/layer_4/output/dense/bias/adam_m" + input: "bert/encoder/layer_4/output/dense/bias/adam_v" + input: "bert/encoder/layer_4/output/dense/kernel" + input: "bert/encoder/layer_4/output/dense/kernel/adam_m" + input: "bert/encoder/layer_4/output/dense/kernel/adam_v" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_5/attention/output/dense/bias" + input: "bert/encoder/layer_5/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_5/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_5/attention/output/dense/kernel" + input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_5/attention/self/key/bias" + input: "bert/encoder/layer_5/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_5/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_5/attention/self/key/kernel" + input: "bert/encoder/layer_5/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_5/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_5/attention/self/query/bias" + input: "bert/encoder/layer_5/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_5/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_5/attention/self/query/kernel" + input: "bert/encoder/layer_5/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_5/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_5/attention/self/value/bias" + input: "bert/encoder/layer_5/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_5/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_5/attention/self/value/kernel" + input: "bert/encoder/layer_5/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_5/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_5/intermediate/dense/bias" + input: "bert/encoder/layer_5/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_5/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_5/intermediate/dense/kernel" + input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_5/output/LayerNorm/beta" + input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_5/output/LayerNorm/gamma" + input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_5/output/dense/bias" + input: "bert/encoder/layer_5/output/dense/bias/adam_m" + input: "bert/encoder/layer_5/output/dense/bias/adam_v" + input: "bert/encoder/layer_5/output/dense/kernel" + input: "bert/encoder/layer_5/output/dense/kernel/adam_m" + input: "bert/encoder/layer_5/output/dense/kernel/adam_v" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_6/attention/output/dense/bias" + input: "bert/encoder/layer_6/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_6/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_6/attention/output/dense/kernel" + input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_6/attention/self/key/bias" + input: "bert/encoder/layer_6/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_6/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_6/attention/self/key/kernel" + input: "bert/encoder/layer_6/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_6/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_6/attention/self/query/bias" + input: "bert/encoder/layer_6/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_6/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_6/attention/self/query/kernel" + input: "bert/encoder/layer_6/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_6/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_6/attention/self/value/bias" + input: "bert/encoder/layer_6/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_6/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_6/attention/self/value/kernel" + input: "bert/encoder/layer_6/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_6/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_6/intermediate/dense/bias" + input: "bert/encoder/layer_6/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_6/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_6/intermediate/dense/kernel" + input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_6/output/LayerNorm/beta" + input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_6/output/LayerNorm/gamma" + input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_6/output/dense/bias" + input: "bert/encoder/layer_6/output/dense/bias/adam_m" + input: "bert/encoder/layer_6/output/dense/bias/adam_v" + input: "bert/encoder/layer_6/output/dense/kernel" + input: "bert/encoder/layer_6/output/dense/kernel/adam_m" + input: "bert/encoder/layer_6/output/dense/kernel/adam_v" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_7/attention/output/dense/bias" + input: "bert/encoder/layer_7/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_7/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_7/attention/output/dense/kernel" + input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_7/attention/self/key/bias" + input: "bert/encoder/layer_7/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_7/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_7/attention/self/key/kernel" + input: "bert/encoder/layer_7/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_7/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_7/attention/self/query/bias" + input: "bert/encoder/layer_7/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_7/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_7/attention/self/query/kernel" + input: "bert/encoder/layer_7/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_7/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_7/attention/self/value/bias" + input: "bert/encoder/layer_7/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_7/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_7/attention/self/value/kernel" + input: "bert/encoder/layer_7/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_7/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_7/intermediate/dense/bias" + input: "bert/encoder/layer_7/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_7/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_7/intermediate/dense/kernel" + input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_7/output/LayerNorm/beta" + input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_7/output/LayerNorm/gamma" + input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_7/output/dense/bias" + input: "bert/encoder/layer_7/output/dense/bias/adam_m" + input: "bert/encoder/layer_7/output/dense/bias/adam_v" + input: "bert/encoder/layer_7/output/dense/kernel" + input: "bert/encoder/layer_7/output/dense/kernel/adam_m" + input: "bert/encoder/layer_7/output/dense/kernel/adam_v" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_8/attention/output/dense/bias" + input: "bert/encoder/layer_8/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_8/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_8/attention/output/dense/kernel" + input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_8/attention/self/key/bias" + input: "bert/encoder/layer_8/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_8/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_8/attention/self/key/kernel" + input: "bert/encoder/layer_8/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_8/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_8/attention/self/query/bias" + input: "bert/encoder/layer_8/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_8/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_8/attention/self/query/kernel" + input: "bert/encoder/layer_8/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_8/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_8/attention/self/value/bias" + input: "bert/encoder/layer_8/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_8/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_8/attention/self/value/kernel" + input: "bert/encoder/layer_8/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_8/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_8/intermediate/dense/bias" + input: "bert/encoder/layer_8/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_8/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_8/intermediate/dense/kernel" + input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_8/output/LayerNorm/beta" + input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_8/output/LayerNorm/gamma" + input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_8/output/dense/bias" + input: "bert/encoder/layer_8/output/dense/bias/adam_m" + input: "bert/encoder/layer_8/output/dense/bias/adam_v" + input: "bert/encoder/layer_8/output/dense/kernel" + input: "bert/encoder/layer_8/output/dense/kernel/adam_m" + input: "bert/encoder/layer_8/output/dense/kernel/adam_v" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_9/attention/output/dense/bias" + input: "bert/encoder/layer_9/attention/output/dense/bias/adam_m" + input: "bert/encoder/layer_9/attention/output/dense/bias/adam_v" + input: "bert/encoder/layer_9/attention/output/dense/kernel" + input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m" + input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v" + input: "bert/encoder/layer_9/attention/self/key/bias" + input: "bert/encoder/layer_9/attention/self/key/bias/adam_m" + input: "bert/encoder/layer_9/attention/self/key/bias/adam_v" + input: "bert/encoder/layer_9/attention/self/key/kernel" + input: "bert/encoder/layer_9/attention/self/key/kernel/adam_m" + input: "bert/encoder/layer_9/attention/self/key/kernel/adam_v" + input: "bert/encoder/layer_9/attention/self/query/bias" + input: "bert/encoder/layer_9/attention/self/query/bias/adam_m" + input: "bert/encoder/layer_9/attention/self/query/bias/adam_v" + input: "bert/encoder/layer_9/attention/self/query/kernel" + input: "bert/encoder/layer_9/attention/self/query/kernel/adam_m" + input: "bert/encoder/layer_9/attention/self/query/kernel/adam_v" + input: "bert/encoder/layer_9/attention/self/value/bias" + input: "bert/encoder/layer_9/attention/self/value/bias/adam_m" + input: "bert/encoder/layer_9/attention/self/value/bias/adam_v" + input: "bert/encoder/layer_9/attention/self/value/kernel" + input: "bert/encoder/layer_9/attention/self/value/kernel/adam_m" + input: "bert/encoder/layer_9/attention/self/value/kernel/adam_v" + input: "bert/encoder/layer_9/intermediate/dense/bias" + input: "bert/encoder/layer_9/intermediate/dense/bias/adam_m" + input: "bert/encoder/layer_9/intermediate/dense/bias/adam_v" + input: "bert/encoder/layer_9/intermediate/dense/kernel" + input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m" + input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v" + input: "bert/encoder/layer_9/output/LayerNorm/beta" + input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m" + input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v" + input: "bert/encoder/layer_9/output/LayerNorm/gamma" + input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m" + input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v" + input: "bert/encoder/layer_9/output/dense/bias" + input: "bert/encoder/layer_9/output/dense/bias/adam_m" + input: "bert/encoder/layer_9/output/dense/bias/adam_v" + input: "bert/encoder/layer_9/output/dense/kernel" + input: "bert/encoder/layer_9/output/dense/kernel/adam_m" + input: "bert/encoder/layer_9/output/dense/kernel/adam_v" + input: "bert/pooler/dense/bias" + input: "bert/pooler/dense/bias/adam_m" + input: "bert/pooler/dense/bias/adam_v" + input: "bert/pooler/dense/kernel" + input: "bert/pooler/dense/kernel/adam_m" + input: "bert/pooler/dense/kernel/adam_v" + input: "cls/predictions/output_bias" + input: "cls/predictions/output_bias/adam_m" + input: "cls/predictions/output_bias/adam_v" + input: "cls/predictions/transform/LayerNorm/beta" + input: "cls/predictions/transform/LayerNorm/beta/adam_m" + input: "cls/predictions/transform/LayerNorm/beta/adam_v" + input: "cls/predictions/transform/LayerNorm/gamma" + input: "cls/predictions/transform/LayerNorm/gamma/adam_m" + input: "cls/predictions/transform/LayerNorm/gamma/adam_v" + input: "cls/predictions/transform/dense/bias" + input: "cls/predictions/transform/dense/bias/adam_m" + input: "cls/predictions/transform/dense/bias/adam_v" + input: "cls/predictions/transform/dense/kernel" + input: "cls/predictions/transform/dense/kernel/adam_m" + input: "cls/predictions/transform/dense/kernel/adam_v" + input: "cls/seq_relationship/output_bias" + input: "cls/seq_relationship/output_bias/adam_m" + input: "cls/seq_relationship/output_bias/adam_v" + input: "cls/seq_relationship/output_weights" + input: "cls/seq_relationship/output_weights/adam_m" + input: "cls/seq_relationship/output_weights/adam_v" + input: "global_step/Read/ReadVariableOp" + device: "/device:CPU:0" + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_INT64 + } + } + } +} +node { + name: "save/control_dependency" + op: "Identity" + input: "save/ShardedFilename" + input: "^save/SaveV2" + device: "/device:CPU:0" + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "_class" + value { + list { + s: "loc:@save/ShardedFilename" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "save/MergeV2Checkpoints/checkpoint_prefixes" + op: "Pack" + input: "save/ShardedFilename" + input: "^save/control_dependency" + device: "/device:CPU:0" + attr { + key: "N" + value { + i: 1 + } + } + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "axis" + value { + i: 0 + } + } +} +node { + name: "save/MergeV2Checkpoints" + op: "MergeV2Checkpoints" + input: "save/MergeV2Checkpoints/checkpoint_prefixes" + input: "save/Const" + device: "/device:CPU:0" + attr { + key: "delete_old_dirs" + value { + b: true + } + } +} +node { + name: "save/Identity" + op: "Identity" + input: "save/Const" + input: "^save/MergeV2Checkpoints" + input: "^save/control_dependency" + device: "/device:CPU:0" + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "save/RestoreV2/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 619 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 619 + } + } + string_val: "bert/embeddings/LayerNorm/beta" + string_val: "bert/embeddings/LayerNorm/beta/adam_m" + string_val: "bert/embeddings/LayerNorm/beta/adam_v" + string_val: "bert/embeddings/LayerNorm/gamma" + string_val: "bert/embeddings/LayerNorm/gamma/adam_m" + string_val: "bert/embeddings/LayerNorm/gamma/adam_v" + string_val: "bert/embeddings/position_embeddings" + string_val: "bert/embeddings/position_embeddings/adam_m" + string_val: "bert/embeddings/position_embeddings/adam_v" + string_val: "bert/embeddings/token_type_embeddings" + string_val: "bert/embeddings/token_type_embeddings/adam_m" + string_val: "bert/embeddings/token_type_embeddings/adam_v" + string_val: "bert/embeddings/word_embeddings" + string_val: "bert/embeddings/word_embeddings/adam_m" + string_val: "bert/embeddings/word_embeddings/adam_v" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_0/attention/output/dense/bias" + string_val: "bert/encoder/layer_0/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_0/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_0/attention/output/dense/kernel" + string_val: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_0/attention/self/key/bias" + string_val: "bert/encoder/layer_0/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_0/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_0/attention/self/key/kernel" + string_val: "bert/encoder/layer_0/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_0/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_0/attention/self/query/bias" + string_val: "bert/encoder/layer_0/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_0/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_0/attention/self/query/kernel" + string_val: "bert/encoder/layer_0/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_0/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_0/attention/self/value/bias" + string_val: "bert/encoder/layer_0/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_0/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_0/attention/self/value/kernel" + string_val: "bert/encoder/layer_0/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_0/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_0/intermediate/dense/bias" + string_val: "bert/encoder/layer_0/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_0/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_0/intermediate/dense/kernel" + string_val: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_0/output/LayerNorm/beta" + string_val: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_0/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_0/output/dense/bias" + string_val: "bert/encoder/layer_0/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_0/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_0/output/dense/kernel" + string_val: "bert/encoder/layer_0/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_0/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_1/attention/output/dense/bias" + string_val: "bert/encoder/layer_1/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_1/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_1/attention/output/dense/kernel" + string_val: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_1/attention/self/key/bias" + string_val: "bert/encoder/layer_1/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_1/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_1/attention/self/key/kernel" + string_val: "bert/encoder/layer_1/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_1/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_1/attention/self/query/bias" + string_val: "bert/encoder/layer_1/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_1/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_1/attention/self/query/kernel" + string_val: "bert/encoder/layer_1/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_1/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_1/attention/self/value/bias" + string_val: "bert/encoder/layer_1/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_1/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_1/attention/self/value/kernel" + string_val: "bert/encoder/layer_1/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_1/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_1/intermediate/dense/bias" + string_val: "bert/encoder/layer_1/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_1/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_1/intermediate/dense/kernel" + string_val: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_1/output/LayerNorm/beta" + string_val: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_1/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_1/output/dense/bias" + string_val: "bert/encoder/layer_1/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_1/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_1/output/dense/kernel" + string_val: "bert/encoder/layer_1/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_1/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_10/attention/output/dense/bias" + string_val: "bert/encoder/layer_10/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_10/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_10/attention/output/dense/kernel" + string_val: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_10/attention/self/key/bias" + string_val: "bert/encoder/layer_10/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_10/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_10/attention/self/key/kernel" + string_val: "bert/encoder/layer_10/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_10/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_10/attention/self/query/bias" + string_val: "bert/encoder/layer_10/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_10/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_10/attention/self/query/kernel" + string_val: "bert/encoder/layer_10/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_10/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_10/attention/self/value/bias" + string_val: "bert/encoder/layer_10/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_10/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_10/attention/self/value/kernel" + string_val: "bert/encoder/layer_10/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_10/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_10/intermediate/dense/bias" + string_val: "bert/encoder/layer_10/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_10/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_10/intermediate/dense/kernel" + string_val: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_10/output/LayerNorm/beta" + string_val: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_10/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_10/output/dense/bias" + string_val: "bert/encoder/layer_10/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_10/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_10/output/dense/kernel" + string_val: "bert/encoder/layer_10/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_10/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_11/attention/output/dense/bias" + string_val: "bert/encoder/layer_11/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_11/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_11/attention/output/dense/kernel" + string_val: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_11/attention/self/key/bias" + string_val: "bert/encoder/layer_11/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_11/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_11/attention/self/key/kernel" + string_val: "bert/encoder/layer_11/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_11/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_11/attention/self/query/bias" + string_val: "bert/encoder/layer_11/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_11/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_11/attention/self/query/kernel" + string_val: "bert/encoder/layer_11/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_11/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_11/attention/self/value/bias" + string_val: "bert/encoder/layer_11/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_11/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_11/attention/self/value/kernel" + string_val: "bert/encoder/layer_11/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_11/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_11/intermediate/dense/bias" + string_val: "bert/encoder/layer_11/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_11/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_11/intermediate/dense/kernel" + string_val: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_11/output/LayerNorm/beta" + string_val: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_11/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_11/output/dense/bias" + string_val: "bert/encoder/layer_11/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_11/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_11/output/dense/kernel" + string_val: "bert/encoder/layer_11/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_11/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_2/attention/output/dense/bias" + string_val: "bert/encoder/layer_2/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_2/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_2/attention/output/dense/kernel" + string_val: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_2/attention/self/key/bias" + string_val: "bert/encoder/layer_2/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_2/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_2/attention/self/key/kernel" + string_val: "bert/encoder/layer_2/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_2/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_2/attention/self/query/bias" + string_val: "bert/encoder/layer_2/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_2/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_2/attention/self/query/kernel" + string_val: "bert/encoder/layer_2/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_2/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_2/attention/self/value/bias" + string_val: "bert/encoder/layer_2/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_2/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_2/attention/self/value/kernel" + string_val: "bert/encoder/layer_2/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_2/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_2/intermediate/dense/bias" + string_val: "bert/encoder/layer_2/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_2/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_2/intermediate/dense/kernel" + string_val: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_2/output/LayerNorm/beta" + string_val: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_2/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_2/output/dense/bias" + string_val: "bert/encoder/layer_2/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_2/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_2/output/dense/kernel" + string_val: "bert/encoder/layer_2/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_2/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_3/attention/output/dense/bias" + string_val: "bert/encoder/layer_3/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_3/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_3/attention/output/dense/kernel" + string_val: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_3/attention/self/key/bias" + string_val: "bert/encoder/layer_3/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_3/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_3/attention/self/key/kernel" + string_val: "bert/encoder/layer_3/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_3/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_3/attention/self/query/bias" + string_val: "bert/encoder/layer_3/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_3/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_3/attention/self/query/kernel" + string_val: "bert/encoder/layer_3/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_3/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_3/attention/self/value/bias" + string_val: "bert/encoder/layer_3/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_3/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_3/attention/self/value/kernel" + string_val: "bert/encoder/layer_3/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_3/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_3/intermediate/dense/bias" + string_val: "bert/encoder/layer_3/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_3/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_3/intermediate/dense/kernel" + string_val: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_3/output/LayerNorm/beta" + string_val: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_3/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_3/output/dense/bias" + string_val: "bert/encoder/layer_3/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_3/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_3/output/dense/kernel" + string_val: "bert/encoder/layer_3/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_3/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_4/attention/output/dense/bias" + string_val: "bert/encoder/layer_4/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_4/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_4/attention/output/dense/kernel" + string_val: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_4/attention/self/key/bias" + string_val: "bert/encoder/layer_4/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_4/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_4/attention/self/key/kernel" + string_val: "bert/encoder/layer_4/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_4/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_4/attention/self/query/bias" + string_val: "bert/encoder/layer_4/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_4/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_4/attention/self/query/kernel" + string_val: "bert/encoder/layer_4/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_4/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_4/attention/self/value/bias" + string_val: "bert/encoder/layer_4/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_4/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_4/attention/self/value/kernel" + string_val: "bert/encoder/layer_4/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_4/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_4/intermediate/dense/bias" + string_val: "bert/encoder/layer_4/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_4/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_4/intermediate/dense/kernel" + string_val: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_4/output/LayerNorm/beta" + string_val: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_4/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_4/output/dense/bias" + string_val: "bert/encoder/layer_4/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_4/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_4/output/dense/kernel" + string_val: "bert/encoder/layer_4/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_4/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_5/attention/output/dense/bias" + string_val: "bert/encoder/layer_5/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_5/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_5/attention/output/dense/kernel" + string_val: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_5/attention/self/key/bias" + string_val: "bert/encoder/layer_5/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_5/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_5/attention/self/key/kernel" + string_val: "bert/encoder/layer_5/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_5/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_5/attention/self/query/bias" + string_val: "bert/encoder/layer_5/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_5/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_5/attention/self/query/kernel" + string_val: "bert/encoder/layer_5/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_5/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_5/attention/self/value/bias" + string_val: "bert/encoder/layer_5/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_5/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_5/attention/self/value/kernel" + string_val: "bert/encoder/layer_5/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_5/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_5/intermediate/dense/bias" + string_val: "bert/encoder/layer_5/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_5/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_5/intermediate/dense/kernel" + string_val: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_5/output/LayerNorm/beta" + string_val: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_5/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_5/output/dense/bias" + string_val: "bert/encoder/layer_5/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_5/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_5/output/dense/kernel" + string_val: "bert/encoder/layer_5/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_5/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_6/attention/output/dense/bias" + string_val: "bert/encoder/layer_6/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_6/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_6/attention/output/dense/kernel" + string_val: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_6/attention/self/key/bias" + string_val: "bert/encoder/layer_6/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_6/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_6/attention/self/key/kernel" + string_val: "bert/encoder/layer_6/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_6/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_6/attention/self/query/bias" + string_val: "bert/encoder/layer_6/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_6/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_6/attention/self/query/kernel" + string_val: "bert/encoder/layer_6/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_6/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_6/attention/self/value/bias" + string_val: "bert/encoder/layer_6/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_6/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_6/attention/self/value/kernel" + string_val: "bert/encoder/layer_6/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_6/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_6/intermediate/dense/bias" + string_val: "bert/encoder/layer_6/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_6/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_6/intermediate/dense/kernel" + string_val: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_6/output/LayerNorm/beta" + string_val: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_6/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_6/output/dense/bias" + string_val: "bert/encoder/layer_6/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_6/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_6/output/dense/kernel" + string_val: "bert/encoder/layer_6/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_6/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_7/attention/output/dense/bias" + string_val: "bert/encoder/layer_7/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_7/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_7/attention/output/dense/kernel" + string_val: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_7/attention/self/key/bias" + string_val: "bert/encoder/layer_7/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_7/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_7/attention/self/key/kernel" + string_val: "bert/encoder/layer_7/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_7/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_7/attention/self/query/bias" + string_val: "bert/encoder/layer_7/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_7/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_7/attention/self/query/kernel" + string_val: "bert/encoder/layer_7/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_7/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_7/attention/self/value/bias" + string_val: "bert/encoder/layer_7/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_7/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_7/attention/self/value/kernel" + string_val: "bert/encoder/layer_7/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_7/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_7/intermediate/dense/bias" + string_val: "bert/encoder/layer_7/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_7/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_7/intermediate/dense/kernel" + string_val: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_7/output/LayerNorm/beta" + string_val: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_7/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_7/output/dense/bias" + string_val: "bert/encoder/layer_7/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_7/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_7/output/dense/kernel" + string_val: "bert/encoder/layer_7/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_7/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_8/attention/output/dense/bias" + string_val: "bert/encoder/layer_8/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_8/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_8/attention/output/dense/kernel" + string_val: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_8/attention/self/key/bias" + string_val: "bert/encoder/layer_8/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_8/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_8/attention/self/key/kernel" + string_val: "bert/encoder/layer_8/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_8/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_8/attention/self/query/bias" + string_val: "bert/encoder/layer_8/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_8/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_8/attention/self/query/kernel" + string_val: "bert/encoder/layer_8/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_8/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_8/attention/self/value/bias" + string_val: "bert/encoder/layer_8/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_8/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_8/attention/self/value/kernel" + string_val: "bert/encoder/layer_8/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_8/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_8/intermediate/dense/bias" + string_val: "bert/encoder/layer_8/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_8/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_8/intermediate/dense/kernel" + string_val: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_8/output/LayerNorm/beta" + string_val: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_8/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_8/output/dense/bias" + string_val: "bert/encoder/layer_8/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_8/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_8/output/dense/kernel" + string_val: "bert/encoder/layer_8/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_8/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/beta" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_9/attention/output/dense/bias" + string_val: "bert/encoder/layer_9/attention/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_9/attention/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_9/attention/output/dense/kernel" + string_val: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v" + string_val: "bert/encoder/layer_9/attention/self/key/bias" + string_val: "bert/encoder/layer_9/attention/self/key/bias/adam_m" + string_val: "bert/encoder/layer_9/attention/self/key/bias/adam_v" + string_val: "bert/encoder/layer_9/attention/self/key/kernel" + string_val: "bert/encoder/layer_9/attention/self/key/kernel/adam_m" + string_val: "bert/encoder/layer_9/attention/self/key/kernel/adam_v" + string_val: "bert/encoder/layer_9/attention/self/query/bias" + string_val: "bert/encoder/layer_9/attention/self/query/bias/adam_m" + string_val: "bert/encoder/layer_9/attention/self/query/bias/adam_v" + string_val: "bert/encoder/layer_9/attention/self/query/kernel" + string_val: "bert/encoder/layer_9/attention/self/query/kernel/adam_m" + string_val: "bert/encoder/layer_9/attention/self/query/kernel/adam_v" + string_val: "bert/encoder/layer_9/attention/self/value/bias" + string_val: "bert/encoder/layer_9/attention/self/value/bias/adam_m" + string_val: "bert/encoder/layer_9/attention/self/value/bias/adam_v" + string_val: "bert/encoder/layer_9/attention/self/value/kernel" + string_val: "bert/encoder/layer_9/attention/self/value/kernel/adam_m" + string_val: "bert/encoder/layer_9/attention/self/value/kernel/adam_v" + string_val: "bert/encoder/layer_9/intermediate/dense/bias" + string_val: "bert/encoder/layer_9/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/layer_9/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/layer_9/intermediate/dense/kernel" + string_val: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/layer_9/output/LayerNorm/beta" + string_val: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m" + string_val: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v" + string_val: "bert/encoder/layer_9/output/LayerNorm/gamma" + string_val: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/layer_9/output/dense/bias" + string_val: "bert/encoder/layer_9/output/dense/bias/adam_m" + string_val: "bert/encoder/layer_9/output/dense/bias/adam_v" + string_val: "bert/encoder/layer_9/output/dense/kernel" + string_val: "bert/encoder/layer_9/output/dense/kernel/adam_m" + string_val: "bert/encoder/layer_9/output/dense/kernel/adam_v" + string_val: "bert/pooler/dense/bias" + string_val: "bert/pooler/dense/bias/adam_m" + string_val: "bert/pooler/dense/bias/adam_v" + string_val: "bert/pooler/dense/kernel" + string_val: "bert/pooler/dense/kernel/adam_m" + string_val: "bert/pooler/dense/kernel/adam_v" + string_val: "cls/predictions/output_bias" + string_val: "cls/predictions/output_bias/adam_m" + string_val: "cls/predictions/output_bias/adam_v" + string_val: "cls/predictions/transform/LayerNorm/beta" + string_val: "cls/predictions/transform/LayerNorm/beta/adam_m" + string_val: "cls/predictions/transform/LayerNorm/beta/adam_v" + string_val: "cls/predictions/transform/LayerNorm/gamma" + string_val: "cls/predictions/transform/LayerNorm/gamma/adam_m" + string_val: "cls/predictions/transform/LayerNorm/gamma/adam_v" + string_val: "cls/predictions/transform/dense/bias" + string_val: "cls/predictions/transform/dense/bias/adam_m" + string_val: "cls/predictions/transform/dense/bias/adam_v" + string_val: "cls/predictions/transform/dense/kernel" + string_val: "cls/predictions/transform/dense/kernel/adam_m" + string_val: "cls/predictions/transform/dense/kernel/adam_v" + string_val: "cls/seq_relationship/output_bias" + string_val: "cls/seq_relationship/output_bias/adam_m" + string_val: "cls/seq_relationship/output_bias/adam_v" + string_val: "cls/seq_relationship/output_weights" + string_val: "cls/seq_relationship/output_weights/adam_m" + string_val: "cls/seq_relationship/output_weights/adam_v" + string_val: "global_step" + } + } + } +} +node { + name: "save/RestoreV2/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 619 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 619 + } + } + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + } + } + } +} +node { + name: "save/RestoreV2" + op: "RestoreV2" + input: "save/Const" + input: "save/RestoreV2/tensor_names" + input: "save/RestoreV2/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_INT64 + } + } + } +} +node { + name: "save/Assign" + op: "Assign" + input: "bert/embeddings/LayerNorm/beta" + input: "save/RestoreV2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_1" + op: "Assign" + input: "bert/embeddings/LayerNorm/beta/adam_m" + input: "save/RestoreV2:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_2" + op: "Assign" + input: "bert/embeddings/LayerNorm/beta/adam_v" + input: "save/RestoreV2:2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_3" + op: "Assign" + input: "bert/embeddings/LayerNorm/gamma" + input: "save/RestoreV2:3" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_4" + op: "Assign" + input: "bert/embeddings/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:4" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_5" + op: "Assign" + input: "bert/embeddings/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:5" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_6" + op: "Assign" + input: "bert/embeddings/position_embeddings" + input: "save/RestoreV2:6" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_7" + op: "Assign" + input: "bert/embeddings/position_embeddings/adam_m" + input: "save/RestoreV2:7" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_8" + op: "Assign" + input: "bert/embeddings/position_embeddings/adam_v" + input: "save/RestoreV2:8" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_9" + op: "Assign" + input: "bert/embeddings/token_type_embeddings" + input: "save/RestoreV2:9" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_10" + op: "Assign" + input: "bert/embeddings/token_type_embeddings/adam_m" + input: "save/RestoreV2:10" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_11" + op: "Assign" + input: "bert/embeddings/token_type_embeddings/adam_v" + input: "save/RestoreV2:11" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_12" + op: "Assign" + input: "bert/embeddings/word_embeddings" + input: "save/RestoreV2:12" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_13" + op: "Assign" + input: "bert/embeddings/word_embeddings/adam_m" + input: "save/RestoreV2:13" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_14" + op: "Assign" + input: "bert/embeddings/word_embeddings/adam_v" + input: "save/RestoreV2:14" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_15" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta" + input: "save/RestoreV2:15" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_16" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:16" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_17" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:17" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_18" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma" + input: "save/RestoreV2:18" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_19" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:19" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_20" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:20" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_21" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/dense/bias" + input: "save/RestoreV2:21" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_22" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/dense/bias/adam_m" + input: "save/RestoreV2:22" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_23" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/dense/bias/adam_v" + input: "save/RestoreV2:23" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_24" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/dense/kernel" + input: "save/RestoreV2:24" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_25" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_m" + input: "save/RestoreV2:25" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_26" + op: "Assign" + input: "bert/encoder/layer_0/attention/output/dense/kernel/adam_v" + input: "save/RestoreV2:26" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_27" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/key/bias" + input: "save/RestoreV2:27" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_28" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/key/bias/adam_m" + input: "save/RestoreV2:28" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_29" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/key/bias/adam_v" + input: "save/RestoreV2:29" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_30" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/key/kernel" + input: "save/RestoreV2:30" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_31" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/key/kernel/adam_m" + input: "save/RestoreV2:31" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_32" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/key/kernel/adam_v" + input: "save/RestoreV2:32" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_33" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/query/bias" + input: "save/RestoreV2:33" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_34" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/query/bias/adam_m" + input: "save/RestoreV2:34" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_35" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/query/bias/adam_v" + input: "save/RestoreV2:35" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_36" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/query/kernel" + input: "save/RestoreV2:36" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_37" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/query/kernel/adam_m" + input: "save/RestoreV2:37" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_38" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/query/kernel/adam_v" + input: "save/RestoreV2:38" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_39" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/value/bias" + input: "save/RestoreV2:39" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_40" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/value/bias/adam_m" + input: "save/RestoreV2:40" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_41" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/value/bias/adam_v" + input: "save/RestoreV2:41" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_42" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/value/kernel" + input: "save/RestoreV2:42" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_43" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/value/kernel/adam_m" + input: "save/RestoreV2:43" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_44" + op: "Assign" + input: "bert/encoder/layer_0/attention/self/value/kernel/adam_v" + input: "save/RestoreV2:44" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_45" + op: "Assign" + input: "bert/encoder/layer_0/intermediate/dense/bias" + input: "save/RestoreV2:45" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_46" + op: "Assign" + input: "bert/encoder/layer_0/intermediate/dense/bias/adam_m" + input: "save/RestoreV2:46" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_47" + op: "Assign" + input: "bert/encoder/layer_0/intermediate/dense/bias/adam_v" + input: "save/RestoreV2:47" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_48" + op: "Assign" + input: "bert/encoder/layer_0/intermediate/dense/kernel" + input: "save/RestoreV2:48" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_49" + op: "Assign" + input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_m" + input: "save/RestoreV2:49" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_50" + op: "Assign" + input: "bert/encoder/layer_0/intermediate/dense/kernel/adam_v" + input: "save/RestoreV2:50" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_51" + op: "Assign" + input: "bert/encoder/layer_0/output/LayerNorm/beta" + input: "save/RestoreV2:51" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_52" + op: "Assign" + input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:52" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_53" + op: "Assign" + input: "bert/encoder/layer_0/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:53" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_54" + op: "Assign" + input: "bert/encoder/layer_0/output/LayerNorm/gamma" + input: "save/RestoreV2:54" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_55" + op: "Assign" + input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:55" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_56" + op: "Assign" + input: "bert/encoder/layer_0/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:56" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_57" + op: "Assign" + input: "bert/encoder/layer_0/output/dense/bias" + input: "save/RestoreV2:57" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_58" + op: "Assign" + input: "bert/encoder/layer_0/output/dense/bias/adam_m" + input: "save/RestoreV2:58" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_59" + op: "Assign" + input: "bert/encoder/layer_0/output/dense/bias/adam_v" + input: "save/RestoreV2:59" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_60" + op: "Assign" + input: "bert/encoder/layer_0/output/dense/kernel" + input: "save/RestoreV2:60" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_61" + op: "Assign" + input: "bert/encoder/layer_0/output/dense/kernel/adam_m" + input: "save/RestoreV2:61" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_62" + op: "Assign" + input: "bert/encoder/layer_0/output/dense/kernel/adam_v" + input: "save/RestoreV2:62" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_0/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_63" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta" + input: "save/RestoreV2:63" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_64" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:64" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_65" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:65" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_66" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma" + input: "save/RestoreV2:66" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_67" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:67" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_68" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:68" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_69" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/dense/bias" + input: "save/RestoreV2:69" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_70" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/dense/bias/adam_m" + input: "save/RestoreV2:70" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_71" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/dense/bias/adam_v" + input: "save/RestoreV2:71" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_72" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/dense/kernel" + input: "save/RestoreV2:72" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_73" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_m" + input: "save/RestoreV2:73" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_74" + op: "Assign" + input: "bert/encoder/layer_1/attention/output/dense/kernel/adam_v" + input: "save/RestoreV2:74" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_75" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/key/bias" + input: "save/RestoreV2:75" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_76" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/key/bias/adam_m" + input: "save/RestoreV2:76" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_77" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/key/bias/adam_v" + input: "save/RestoreV2:77" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_78" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/key/kernel" + input: "save/RestoreV2:78" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_79" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/key/kernel/adam_m" + input: "save/RestoreV2:79" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_80" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/key/kernel/adam_v" + input: "save/RestoreV2:80" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_81" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/query/bias" + input: "save/RestoreV2:81" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_82" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/query/bias/adam_m" + input: "save/RestoreV2:82" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_83" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/query/bias/adam_v" + input: "save/RestoreV2:83" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_84" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/query/kernel" + input: "save/RestoreV2:84" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_85" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/query/kernel/adam_m" + input: "save/RestoreV2:85" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_86" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/query/kernel/adam_v" + input: "save/RestoreV2:86" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_87" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/value/bias" + input: "save/RestoreV2:87" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_88" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/value/bias/adam_m" + input: "save/RestoreV2:88" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_89" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/value/bias/adam_v" + input: "save/RestoreV2:89" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_90" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/value/kernel" + input: "save/RestoreV2:90" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_91" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/value/kernel/adam_m" + input: "save/RestoreV2:91" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_92" + op: "Assign" + input: "bert/encoder/layer_1/attention/self/value/kernel/adam_v" + input: "save/RestoreV2:92" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_93" + op: "Assign" + input: "bert/encoder/layer_1/intermediate/dense/bias" + input: "save/RestoreV2:93" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_94" + op: "Assign" + input: "bert/encoder/layer_1/intermediate/dense/bias/adam_m" + input: "save/RestoreV2:94" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_95" + op: "Assign" + input: "bert/encoder/layer_1/intermediate/dense/bias/adam_v" + input: "save/RestoreV2:95" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_96" + op: "Assign" + input: "bert/encoder/layer_1/intermediate/dense/kernel" + input: "save/RestoreV2:96" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_97" + op: "Assign" + input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_m" + input: "save/RestoreV2:97" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_98" + op: "Assign" + input: "bert/encoder/layer_1/intermediate/dense/kernel/adam_v" + input: "save/RestoreV2:98" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_99" + op: "Assign" + input: "bert/encoder/layer_1/output/LayerNorm/beta" + input: "save/RestoreV2:99" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_100" + op: "Assign" + input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:100" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_101" + op: "Assign" + input: "bert/encoder/layer_1/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:101" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_102" + op: "Assign" + input: "bert/encoder/layer_1/output/LayerNorm/gamma" + input: "save/RestoreV2:102" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_103" + op: "Assign" + input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:103" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_104" + op: "Assign" + input: "bert/encoder/layer_1/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:104" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_105" + op: "Assign" + input: "bert/encoder/layer_1/output/dense/bias" + input: "save/RestoreV2:105" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_106" + op: "Assign" + input: "bert/encoder/layer_1/output/dense/bias/adam_m" + input: "save/RestoreV2:106" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_107" + op: "Assign" + input: "bert/encoder/layer_1/output/dense/bias/adam_v" + input: "save/RestoreV2:107" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_108" + op: "Assign" + input: "bert/encoder/layer_1/output/dense/kernel" + input: "save/RestoreV2:108" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_109" + op: "Assign" + input: "bert/encoder/layer_1/output/dense/kernel/adam_m" + input: "save/RestoreV2:109" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_110" + op: "Assign" + input: "bert/encoder/layer_1/output/dense/kernel/adam_v" + input: "save/RestoreV2:110" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_1/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_111" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta" + input: "save/RestoreV2:111" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_112" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:112" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_113" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:113" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_114" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma" + input: "save/RestoreV2:114" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_115" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:115" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_116" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:116" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_117" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/dense/bias" + input: "save/RestoreV2:117" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_118" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/dense/bias/adam_m" + input: "save/RestoreV2:118" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_119" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/dense/bias/adam_v" + input: "save/RestoreV2:119" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_120" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/dense/kernel" + input: "save/RestoreV2:120" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_121" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_m" + input: "save/RestoreV2:121" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_122" + op: "Assign" + input: "bert/encoder/layer_10/attention/output/dense/kernel/adam_v" + input: "save/RestoreV2:122" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_123" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/key/bias" + input: "save/RestoreV2:123" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_124" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/key/bias/adam_m" + input: "save/RestoreV2:124" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_125" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/key/bias/adam_v" + input: "save/RestoreV2:125" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_126" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/key/kernel" + input: "save/RestoreV2:126" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_127" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/key/kernel/adam_m" + input: "save/RestoreV2:127" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_128" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/key/kernel/adam_v" + input: "save/RestoreV2:128" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_129" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/query/bias" + input: "save/RestoreV2:129" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_130" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/query/bias/adam_m" + input: "save/RestoreV2:130" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_131" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/query/bias/adam_v" + input: "save/RestoreV2:131" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_132" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/query/kernel" + input: "save/RestoreV2:132" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_133" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/query/kernel/adam_m" + input: "save/RestoreV2:133" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_134" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/query/kernel/adam_v" + input: "save/RestoreV2:134" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_135" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/value/bias" + input: "save/RestoreV2:135" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_136" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/value/bias/adam_m" + input: "save/RestoreV2:136" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_137" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/value/bias/adam_v" + input: "save/RestoreV2:137" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_138" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/value/kernel" + input: "save/RestoreV2:138" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_139" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/value/kernel/adam_m" + input: "save/RestoreV2:139" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_140" + op: "Assign" + input: "bert/encoder/layer_10/attention/self/value/kernel/adam_v" + input: "save/RestoreV2:140" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_141" + op: "Assign" + input: "bert/encoder/layer_10/intermediate/dense/bias" + input: "save/RestoreV2:141" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_142" + op: "Assign" + input: "bert/encoder/layer_10/intermediate/dense/bias/adam_m" + input: "save/RestoreV2:142" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_143" + op: "Assign" + input: "bert/encoder/layer_10/intermediate/dense/bias/adam_v" + input: "save/RestoreV2:143" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_144" + op: "Assign" + input: "bert/encoder/layer_10/intermediate/dense/kernel" + input: "save/RestoreV2:144" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_145" + op: "Assign" + input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_m" + input: "save/RestoreV2:145" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_146" + op: "Assign" + input: "bert/encoder/layer_10/intermediate/dense/kernel/adam_v" + input: "save/RestoreV2:146" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_147" + op: "Assign" + input: "bert/encoder/layer_10/output/LayerNorm/beta" + input: "save/RestoreV2:147" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_148" + op: "Assign" + input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:148" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_149" + op: "Assign" + input: "bert/encoder/layer_10/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:149" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_150" + op: "Assign" + input: "bert/encoder/layer_10/output/LayerNorm/gamma" + input: "save/RestoreV2:150" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_151" + op: "Assign" + input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:151" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_152" + op: "Assign" + input: "bert/encoder/layer_10/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:152" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_153" + op: "Assign" + input: "bert/encoder/layer_10/output/dense/bias" + input: "save/RestoreV2:153" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_154" + op: "Assign" + input: "bert/encoder/layer_10/output/dense/bias/adam_m" + input: "save/RestoreV2:154" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_155" + op: "Assign" + input: "bert/encoder/layer_10/output/dense/bias/adam_v" + input: "save/RestoreV2:155" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_156" + op: "Assign" + input: "bert/encoder/layer_10/output/dense/kernel" + input: "save/RestoreV2:156" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_157" + op: "Assign" + input: "bert/encoder/layer_10/output/dense/kernel/adam_m" + input: "save/RestoreV2:157" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_158" + op: "Assign" + input: "bert/encoder/layer_10/output/dense/kernel/adam_v" + input: "save/RestoreV2:158" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_10/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_159" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta" + input: "save/RestoreV2:159" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_160" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:160" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_161" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:161" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_162" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma" + input: "save/RestoreV2:162" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_163" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:163" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_164" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:164" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_165" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/dense/bias" + input: "save/RestoreV2:165" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_166" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/dense/bias/adam_m" + input: "save/RestoreV2:166" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_167" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/dense/bias/adam_v" + input: "save/RestoreV2:167" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_168" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/dense/kernel" + input: "save/RestoreV2:168" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_169" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_m" + input: "save/RestoreV2:169" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_170" + op: "Assign" + input: "bert/encoder/layer_11/attention/output/dense/kernel/adam_v" + input: "save/RestoreV2:170" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_171" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/key/bias" + input: "save/RestoreV2:171" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_172" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/key/bias/adam_m" + input: "save/RestoreV2:172" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_173" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/key/bias/adam_v" + input: "save/RestoreV2:173" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_174" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/key/kernel" + input: "save/RestoreV2:174" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_175" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/key/kernel/adam_m" + input: "save/RestoreV2:175" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_176" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/key/kernel/adam_v" + input: "save/RestoreV2:176" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_177" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/query/bias" + input: "save/RestoreV2:177" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_178" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/query/bias/adam_m" + input: "save/RestoreV2:178" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_179" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/query/bias/adam_v" + input: "save/RestoreV2:179" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_180" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/query/kernel" + input: "save/RestoreV2:180" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_181" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/query/kernel/adam_m" + input: "save/RestoreV2:181" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_182" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/query/kernel/adam_v" + input: "save/RestoreV2:182" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_183" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/value/bias" + input: "save/RestoreV2:183" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_184" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/value/bias/adam_m" + input: "save/RestoreV2:184" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_185" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/value/bias/adam_v" + input: "save/RestoreV2:185" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_186" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/value/kernel" + input: "save/RestoreV2:186" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_187" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/value/kernel/adam_m" + input: "save/RestoreV2:187" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_188" + op: "Assign" + input: "bert/encoder/layer_11/attention/self/value/kernel/adam_v" + input: "save/RestoreV2:188" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_189" + op: "Assign" + input: "bert/encoder/layer_11/intermediate/dense/bias" + input: "save/RestoreV2:189" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_190" + op: "Assign" + input: "bert/encoder/layer_11/intermediate/dense/bias/adam_m" + input: "save/RestoreV2:190" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_191" + op: "Assign" + input: "bert/encoder/layer_11/intermediate/dense/bias/adam_v" + input: "save/RestoreV2:191" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_192" + op: "Assign" + input: "bert/encoder/layer_11/intermediate/dense/kernel" + input: "save/RestoreV2:192" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_193" + op: "Assign" + input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_m" + input: "save/RestoreV2:193" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_194" + op: "Assign" + input: "bert/encoder/layer_11/intermediate/dense/kernel/adam_v" + input: "save/RestoreV2:194" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_195" + op: "Assign" + input: "bert/encoder/layer_11/output/LayerNorm/beta" + input: "save/RestoreV2:195" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_196" + op: "Assign" + input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:196" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_197" + op: "Assign" + input: "bert/encoder/layer_11/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:197" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_198" + op: "Assign" + input: "bert/encoder/layer_11/output/LayerNorm/gamma" + input: "save/RestoreV2:198" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_199" + op: "Assign" + input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:199" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_200" + op: "Assign" + input: "bert/encoder/layer_11/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:200" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_201" + op: "Assign" + input: "bert/encoder/layer_11/output/dense/bias" + input: "save/RestoreV2:201" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_202" + op: "Assign" + input: "bert/encoder/layer_11/output/dense/bias/adam_m" + input: "save/RestoreV2:202" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_203" + op: "Assign" + input: "bert/encoder/layer_11/output/dense/bias/adam_v" + input: "save/RestoreV2:203" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_204" + op: "Assign" + input: "bert/encoder/layer_11/output/dense/kernel" + input: "save/RestoreV2:204" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_205" + op: "Assign" + input: "bert/encoder/layer_11/output/dense/kernel/adam_m" + input: "save/RestoreV2:205" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_206" + op: "Assign" + input: "bert/encoder/layer_11/output/dense/kernel/adam_v" + input: "save/RestoreV2:206" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_11/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_207" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta" + input: "save/RestoreV2:207" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_208" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:208" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_209" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:209" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_210" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma" + input: "save/RestoreV2:210" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_211" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:211" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_212" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:212" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_213" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/dense/bias" + input: "save/RestoreV2:213" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_214" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/dense/bias/adam_m" + input: "save/RestoreV2:214" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_215" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/dense/bias/adam_v" + input: "save/RestoreV2:215" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_216" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/dense/kernel" + input: "save/RestoreV2:216" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_217" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_m" + input: "save/RestoreV2:217" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_218" + op: "Assign" + input: "bert/encoder/layer_2/attention/output/dense/kernel/adam_v" + input: "save/RestoreV2:218" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_219" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/key/bias" + input: "save/RestoreV2:219" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_220" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/key/bias/adam_m" + input: "save/RestoreV2:220" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_221" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/key/bias/adam_v" + input: "save/RestoreV2:221" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_222" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/key/kernel" + input: "save/RestoreV2:222" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_223" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/key/kernel/adam_m" + input: "save/RestoreV2:223" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_224" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/key/kernel/adam_v" + input: "save/RestoreV2:224" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_225" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/query/bias" + input: "save/RestoreV2:225" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_226" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/query/bias/adam_m" + input: "save/RestoreV2:226" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_227" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/query/bias/adam_v" + input: "save/RestoreV2:227" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_228" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/query/kernel" + input: "save/RestoreV2:228" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_229" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/query/kernel/adam_m" + input: "save/RestoreV2:229" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_230" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/query/kernel/adam_v" + input: "save/RestoreV2:230" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_231" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/value/bias" + input: "save/RestoreV2:231" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_232" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/value/bias/adam_m" + input: "save/RestoreV2:232" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_233" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/value/bias/adam_v" + input: "save/RestoreV2:233" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_234" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/value/kernel" + input: "save/RestoreV2:234" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_235" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/value/kernel/adam_m" + input: "save/RestoreV2:235" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_236" + op: "Assign" + input: "bert/encoder/layer_2/attention/self/value/kernel/adam_v" + input: "save/RestoreV2:236" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_237" + op: "Assign" + input: "bert/encoder/layer_2/intermediate/dense/bias" + input: "save/RestoreV2:237" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_238" + op: "Assign" + input: "bert/encoder/layer_2/intermediate/dense/bias/adam_m" + input: "save/RestoreV2:238" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_239" + op: "Assign" + input: "bert/encoder/layer_2/intermediate/dense/bias/adam_v" + input: "save/RestoreV2:239" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_240" + op: "Assign" + input: "bert/encoder/layer_2/intermediate/dense/kernel" + input: "save/RestoreV2:240" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_241" + op: "Assign" + input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_m" + input: "save/RestoreV2:241" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_242" + op: "Assign" + input: "bert/encoder/layer_2/intermediate/dense/kernel/adam_v" + input: "save/RestoreV2:242" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_243" + op: "Assign" + input: "bert/encoder/layer_2/output/LayerNorm/beta" + input: "save/RestoreV2:243" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_244" + op: "Assign" + input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:244" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_245" + op: "Assign" + input: "bert/encoder/layer_2/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:245" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_246" + op: "Assign" + input: "bert/encoder/layer_2/output/LayerNorm/gamma" + input: "save/RestoreV2:246" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_247" + op: "Assign" + input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:247" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_248" + op: "Assign" + input: "bert/encoder/layer_2/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:248" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_249" + op: "Assign" + input: "bert/encoder/layer_2/output/dense/bias" + input: "save/RestoreV2:249" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_250" + op: "Assign" + input: "bert/encoder/layer_2/output/dense/bias/adam_m" + input: "save/RestoreV2:250" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_251" + op: "Assign" + input: "bert/encoder/layer_2/output/dense/bias/adam_v" + input: "save/RestoreV2:251" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_252" + op: "Assign" + input: "bert/encoder/layer_2/output/dense/kernel" + input: "save/RestoreV2:252" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_253" + op: "Assign" + input: "bert/encoder/layer_2/output/dense/kernel/adam_m" + input: "save/RestoreV2:253" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_254" + op: "Assign" + input: "bert/encoder/layer_2/output/dense/kernel/adam_v" + input: "save/RestoreV2:254" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_2/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_255" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta" + input: "save/RestoreV2:255" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_256" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:256" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_257" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:257" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_258" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma" + input: "save/RestoreV2:258" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_259" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:259" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_260" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:260" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_261" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/dense/bias" + input: "save/RestoreV2:261" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_262" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/dense/bias/adam_m" + input: "save/RestoreV2:262" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_263" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/dense/bias/adam_v" + input: "save/RestoreV2:263" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_264" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/dense/kernel" + input: "save/RestoreV2:264" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_265" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_m" + input: "save/RestoreV2:265" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_266" + op: "Assign" + input: "bert/encoder/layer_3/attention/output/dense/kernel/adam_v" + input: "save/RestoreV2:266" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_267" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/key/bias" + input: "save/RestoreV2:267" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_268" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/key/bias/adam_m" + input: "save/RestoreV2:268" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_269" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/key/bias/adam_v" + input: "save/RestoreV2:269" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_270" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/key/kernel" + input: "save/RestoreV2:270" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_271" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/key/kernel/adam_m" + input: "save/RestoreV2:271" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_272" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/key/kernel/adam_v" + input: "save/RestoreV2:272" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_273" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/query/bias" + input: "save/RestoreV2:273" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_274" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/query/bias/adam_m" + input: "save/RestoreV2:274" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_275" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/query/bias/adam_v" + input: "save/RestoreV2:275" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_276" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/query/kernel" + input: "save/RestoreV2:276" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_277" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/query/kernel/adam_m" + input: "save/RestoreV2:277" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_278" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/query/kernel/adam_v" + input: "save/RestoreV2:278" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_279" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/value/bias" + input: "save/RestoreV2:279" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_280" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/value/bias/adam_m" + input: "save/RestoreV2:280" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_281" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/value/bias/adam_v" + input: "save/RestoreV2:281" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_282" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/value/kernel" + input: "save/RestoreV2:282" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_283" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/value/kernel/adam_m" + input: "save/RestoreV2:283" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_284" + op: "Assign" + input: "bert/encoder/layer_3/attention/self/value/kernel/adam_v" + input: "save/RestoreV2:284" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_285" + op: "Assign" + input: "bert/encoder/layer_3/intermediate/dense/bias" + input: "save/RestoreV2:285" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_286" + op: "Assign" + input: "bert/encoder/layer_3/intermediate/dense/bias/adam_m" + input: "save/RestoreV2:286" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_287" + op: "Assign" + input: "bert/encoder/layer_3/intermediate/dense/bias/adam_v" + input: "save/RestoreV2:287" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_288" + op: "Assign" + input: "bert/encoder/layer_3/intermediate/dense/kernel" + input: "save/RestoreV2:288" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_289" + op: "Assign" + input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_m" + input: "save/RestoreV2:289" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_290" + op: "Assign" + input: "bert/encoder/layer_3/intermediate/dense/kernel/adam_v" + input: "save/RestoreV2:290" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_291" + op: "Assign" + input: "bert/encoder/layer_3/output/LayerNorm/beta" + input: "save/RestoreV2:291" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_292" + op: "Assign" + input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:292" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_293" + op: "Assign" + input: "bert/encoder/layer_3/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:293" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_294" + op: "Assign" + input: "bert/encoder/layer_3/output/LayerNorm/gamma" + input: "save/RestoreV2:294" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_295" + op: "Assign" + input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:295" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_296" + op: "Assign" + input: "bert/encoder/layer_3/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:296" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_297" + op: "Assign" + input: "bert/encoder/layer_3/output/dense/bias" + input: "save/RestoreV2:297" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_298" + op: "Assign" + input: "bert/encoder/layer_3/output/dense/bias/adam_m" + input: "save/RestoreV2:298" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_299" + op: "Assign" + input: "bert/encoder/layer_3/output/dense/bias/adam_v" + input: "save/RestoreV2:299" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_300" + op: "Assign" + input: "bert/encoder/layer_3/output/dense/kernel" + input: "save/RestoreV2:300" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_301" + op: "Assign" + input: "bert/encoder/layer_3/output/dense/kernel/adam_m" + input: "save/RestoreV2:301" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_302" + op: "Assign" + input: "bert/encoder/layer_3/output/dense/kernel/adam_v" + input: "save/RestoreV2:302" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_3/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_303" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta" + input: "save/RestoreV2:303" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_304" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:304" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_305" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:305" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_306" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma" + input: "save/RestoreV2:306" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_307" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:307" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_308" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:308" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_309" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/dense/bias" + input: "save/RestoreV2:309" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_310" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/dense/bias/adam_m" + input: "save/RestoreV2:310" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_311" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/dense/bias/adam_v" + input: "save/RestoreV2:311" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_312" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/dense/kernel" + input: "save/RestoreV2:312" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_313" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_m" + input: "save/RestoreV2:313" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_314" + op: "Assign" + input: "bert/encoder/layer_4/attention/output/dense/kernel/adam_v" + input: "save/RestoreV2:314" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_315" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/key/bias" + input: "save/RestoreV2:315" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_316" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/key/bias/adam_m" + input: "save/RestoreV2:316" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_317" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/key/bias/adam_v" + input: "save/RestoreV2:317" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_318" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/key/kernel" + input: "save/RestoreV2:318" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_319" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/key/kernel/adam_m" + input: "save/RestoreV2:319" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_320" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/key/kernel/adam_v" + input: "save/RestoreV2:320" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_321" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/query/bias" + input: "save/RestoreV2:321" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_322" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/query/bias/adam_m" + input: "save/RestoreV2:322" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_323" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/query/bias/adam_v" + input: "save/RestoreV2:323" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_324" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/query/kernel" + input: "save/RestoreV2:324" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_325" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/query/kernel/adam_m" + input: "save/RestoreV2:325" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_326" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/query/kernel/adam_v" + input: "save/RestoreV2:326" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_327" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/value/bias" + input: "save/RestoreV2:327" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_328" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/value/bias/adam_m" + input: "save/RestoreV2:328" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_329" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/value/bias/adam_v" + input: "save/RestoreV2:329" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_330" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/value/kernel" + input: "save/RestoreV2:330" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_331" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/value/kernel/adam_m" + input: "save/RestoreV2:331" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_332" + op: "Assign" + input: "bert/encoder/layer_4/attention/self/value/kernel/adam_v" + input: "save/RestoreV2:332" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_333" + op: "Assign" + input: "bert/encoder/layer_4/intermediate/dense/bias" + input: "save/RestoreV2:333" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_334" + op: "Assign" + input: "bert/encoder/layer_4/intermediate/dense/bias/adam_m" + input: "save/RestoreV2:334" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_335" + op: "Assign" + input: "bert/encoder/layer_4/intermediate/dense/bias/adam_v" + input: "save/RestoreV2:335" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_336" + op: "Assign" + input: "bert/encoder/layer_4/intermediate/dense/kernel" + input: "save/RestoreV2:336" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_337" + op: "Assign" + input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_m" + input: "save/RestoreV2:337" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_338" + op: "Assign" + input: "bert/encoder/layer_4/intermediate/dense/kernel/adam_v" + input: "save/RestoreV2:338" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_339" + op: "Assign" + input: "bert/encoder/layer_4/output/LayerNorm/beta" + input: "save/RestoreV2:339" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_340" + op: "Assign" + input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:340" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_341" + op: "Assign" + input: "bert/encoder/layer_4/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:341" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_342" + op: "Assign" + input: "bert/encoder/layer_4/output/LayerNorm/gamma" + input: "save/RestoreV2:342" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_343" + op: "Assign" + input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:343" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_344" + op: "Assign" + input: "bert/encoder/layer_4/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:344" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_345" + op: "Assign" + input: "bert/encoder/layer_4/output/dense/bias" + input: "save/RestoreV2:345" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_346" + op: "Assign" + input: "bert/encoder/layer_4/output/dense/bias/adam_m" + input: "save/RestoreV2:346" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_347" + op: "Assign" + input: "bert/encoder/layer_4/output/dense/bias/adam_v" + input: "save/RestoreV2:347" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_348" + op: "Assign" + input: "bert/encoder/layer_4/output/dense/kernel" + input: "save/RestoreV2:348" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_349" + op: "Assign" + input: "bert/encoder/layer_4/output/dense/kernel/adam_m" + input: "save/RestoreV2:349" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_350" + op: "Assign" + input: "bert/encoder/layer_4/output/dense/kernel/adam_v" + input: "save/RestoreV2:350" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_4/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_351" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta" + input: "save/RestoreV2:351" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_352" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:352" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_353" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:353" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_354" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma" + input: "save/RestoreV2:354" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_355" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:355" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_356" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:356" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_357" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/dense/bias" + input: "save/RestoreV2:357" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_358" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/dense/bias/adam_m" + input: "save/RestoreV2:358" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_359" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/dense/bias/adam_v" + input: "save/RestoreV2:359" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_360" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/dense/kernel" + input: "save/RestoreV2:360" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_361" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_m" + input: "save/RestoreV2:361" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_362" + op: "Assign" + input: "bert/encoder/layer_5/attention/output/dense/kernel/adam_v" + input: "save/RestoreV2:362" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_363" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/key/bias" + input: "save/RestoreV2:363" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_364" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/key/bias/adam_m" + input: "save/RestoreV2:364" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_365" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/key/bias/adam_v" + input: "save/RestoreV2:365" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_366" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/key/kernel" + input: "save/RestoreV2:366" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_367" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/key/kernel/adam_m" + input: "save/RestoreV2:367" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_368" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/key/kernel/adam_v" + input: "save/RestoreV2:368" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_369" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/query/bias" + input: "save/RestoreV2:369" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_370" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/query/bias/adam_m" + input: "save/RestoreV2:370" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_371" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/query/bias/adam_v" + input: "save/RestoreV2:371" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_372" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/query/kernel" + input: "save/RestoreV2:372" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_373" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/query/kernel/adam_m" + input: "save/RestoreV2:373" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_374" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/query/kernel/adam_v" + input: "save/RestoreV2:374" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_375" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/value/bias" + input: "save/RestoreV2:375" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_376" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/value/bias/adam_m" + input: "save/RestoreV2:376" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_377" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/value/bias/adam_v" + input: "save/RestoreV2:377" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_378" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/value/kernel" + input: "save/RestoreV2:378" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_379" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/value/kernel/adam_m" + input: "save/RestoreV2:379" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_380" + op: "Assign" + input: "bert/encoder/layer_5/attention/self/value/kernel/adam_v" + input: "save/RestoreV2:380" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_381" + op: "Assign" + input: "bert/encoder/layer_5/intermediate/dense/bias" + input: "save/RestoreV2:381" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_382" + op: "Assign" + input: "bert/encoder/layer_5/intermediate/dense/bias/adam_m" + input: "save/RestoreV2:382" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_383" + op: "Assign" + input: "bert/encoder/layer_5/intermediate/dense/bias/adam_v" + input: "save/RestoreV2:383" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_384" + op: "Assign" + input: "bert/encoder/layer_5/intermediate/dense/kernel" + input: "save/RestoreV2:384" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_385" + op: "Assign" + input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_m" + input: "save/RestoreV2:385" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_386" + op: "Assign" + input: "bert/encoder/layer_5/intermediate/dense/kernel/adam_v" + input: "save/RestoreV2:386" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_387" + op: "Assign" + input: "bert/encoder/layer_5/output/LayerNorm/beta" + input: "save/RestoreV2:387" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_388" + op: "Assign" + input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:388" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_389" + op: "Assign" + input: "bert/encoder/layer_5/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:389" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_390" + op: "Assign" + input: "bert/encoder/layer_5/output/LayerNorm/gamma" + input: "save/RestoreV2:390" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_391" + op: "Assign" + input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:391" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_392" + op: "Assign" + input: "bert/encoder/layer_5/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:392" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_393" + op: "Assign" + input: "bert/encoder/layer_5/output/dense/bias" + input: "save/RestoreV2:393" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_394" + op: "Assign" + input: "bert/encoder/layer_5/output/dense/bias/adam_m" + input: "save/RestoreV2:394" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_395" + op: "Assign" + input: "bert/encoder/layer_5/output/dense/bias/adam_v" + input: "save/RestoreV2:395" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_396" + op: "Assign" + input: "bert/encoder/layer_5/output/dense/kernel" + input: "save/RestoreV2:396" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_397" + op: "Assign" + input: "bert/encoder/layer_5/output/dense/kernel/adam_m" + input: "save/RestoreV2:397" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_398" + op: "Assign" + input: "bert/encoder/layer_5/output/dense/kernel/adam_v" + input: "save/RestoreV2:398" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_5/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_399" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta" + input: "save/RestoreV2:399" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_400" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:400" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_401" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:401" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_402" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma" + input: "save/RestoreV2:402" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_403" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:403" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_404" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:404" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_405" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/dense/bias" + input: "save/RestoreV2:405" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_406" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/dense/bias/adam_m" + input: "save/RestoreV2:406" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_407" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/dense/bias/adam_v" + input: "save/RestoreV2:407" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_408" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/dense/kernel" + input: "save/RestoreV2:408" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_409" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_m" + input: "save/RestoreV2:409" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_410" + op: "Assign" + input: "bert/encoder/layer_6/attention/output/dense/kernel/adam_v" + input: "save/RestoreV2:410" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_411" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/key/bias" + input: "save/RestoreV2:411" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_412" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/key/bias/adam_m" + input: "save/RestoreV2:412" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_413" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/key/bias/adam_v" + input: "save/RestoreV2:413" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_414" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/key/kernel" + input: "save/RestoreV2:414" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_415" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/key/kernel/adam_m" + input: "save/RestoreV2:415" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_416" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/key/kernel/adam_v" + input: "save/RestoreV2:416" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_417" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/query/bias" + input: "save/RestoreV2:417" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_418" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/query/bias/adam_m" + input: "save/RestoreV2:418" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_419" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/query/bias/adam_v" + input: "save/RestoreV2:419" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_420" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/query/kernel" + input: "save/RestoreV2:420" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_421" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/query/kernel/adam_m" + input: "save/RestoreV2:421" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_422" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/query/kernel/adam_v" + input: "save/RestoreV2:422" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_423" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/value/bias" + input: "save/RestoreV2:423" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_424" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/value/bias/adam_m" + input: "save/RestoreV2:424" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_425" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/value/bias/adam_v" + input: "save/RestoreV2:425" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_426" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/value/kernel" + input: "save/RestoreV2:426" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_427" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/value/kernel/adam_m" + input: "save/RestoreV2:427" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_428" + op: "Assign" + input: "bert/encoder/layer_6/attention/self/value/kernel/adam_v" + input: "save/RestoreV2:428" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_429" + op: "Assign" + input: "bert/encoder/layer_6/intermediate/dense/bias" + input: "save/RestoreV2:429" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_430" + op: "Assign" + input: "bert/encoder/layer_6/intermediate/dense/bias/adam_m" + input: "save/RestoreV2:430" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_431" + op: "Assign" + input: "bert/encoder/layer_6/intermediate/dense/bias/adam_v" + input: "save/RestoreV2:431" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_432" + op: "Assign" + input: "bert/encoder/layer_6/intermediate/dense/kernel" + input: "save/RestoreV2:432" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_433" + op: "Assign" + input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_m" + input: "save/RestoreV2:433" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_434" + op: "Assign" + input: "bert/encoder/layer_6/intermediate/dense/kernel/adam_v" + input: "save/RestoreV2:434" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_435" + op: "Assign" + input: "bert/encoder/layer_6/output/LayerNorm/beta" + input: "save/RestoreV2:435" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_436" + op: "Assign" + input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:436" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_437" + op: "Assign" + input: "bert/encoder/layer_6/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:437" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_438" + op: "Assign" + input: "bert/encoder/layer_6/output/LayerNorm/gamma" + input: "save/RestoreV2:438" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_439" + op: "Assign" + input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:439" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_440" + op: "Assign" + input: "bert/encoder/layer_6/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:440" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_441" + op: "Assign" + input: "bert/encoder/layer_6/output/dense/bias" + input: "save/RestoreV2:441" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_442" + op: "Assign" + input: "bert/encoder/layer_6/output/dense/bias/adam_m" + input: "save/RestoreV2:442" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_443" + op: "Assign" + input: "bert/encoder/layer_6/output/dense/bias/adam_v" + input: "save/RestoreV2:443" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_444" + op: "Assign" + input: "bert/encoder/layer_6/output/dense/kernel" + input: "save/RestoreV2:444" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_445" + op: "Assign" + input: "bert/encoder/layer_6/output/dense/kernel/adam_m" + input: "save/RestoreV2:445" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_446" + op: "Assign" + input: "bert/encoder/layer_6/output/dense/kernel/adam_v" + input: "save/RestoreV2:446" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_6/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_447" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta" + input: "save/RestoreV2:447" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_448" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:448" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_449" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:449" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_450" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma" + input: "save/RestoreV2:450" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_451" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:451" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_452" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:452" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_453" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/dense/bias" + input: "save/RestoreV2:453" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_454" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/dense/bias/adam_m" + input: "save/RestoreV2:454" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_455" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/dense/bias/adam_v" + input: "save/RestoreV2:455" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_456" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/dense/kernel" + input: "save/RestoreV2:456" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_457" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_m" + input: "save/RestoreV2:457" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_458" + op: "Assign" + input: "bert/encoder/layer_7/attention/output/dense/kernel/adam_v" + input: "save/RestoreV2:458" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_459" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/key/bias" + input: "save/RestoreV2:459" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_460" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/key/bias/adam_m" + input: "save/RestoreV2:460" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_461" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/key/bias/adam_v" + input: "save/RestoreV2:461" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_462" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/key/kernel" + input: "save/RestoreV2:462" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_463" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/key/kernel/adam_m" + input: "save/RestoreV2:463" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_464" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/key/kernel/adam_v" + input: "save/RestoreV2:464" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_465" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/query/bias" + input: "save/RestoreV2:465" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_466" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/query/bias/adam_m" + input: "save/RestoreV2:466" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_467" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/query/bias/adam_v" + input: "save/RestoreV2:467" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_468" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/query/kernel" + input: "save/RestoreV2:468" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_469" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/query/kernel/adam_m" + input: "save/RestoreV2:469" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_470" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/query/kernel/adam_v" + input: "save/RestoreV2:470" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_471" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/value/bias" + input: "save/RestoreV2:471" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_472" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/value/bias/adam_m" + input: "save/RestoreV2:472" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_473" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/value/bias/adam_v" + input: "save/RestoreV2:473" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_474" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/value/kernel" + input: "save/RestoreV2:474" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_475" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/value/kernel/adam_m" + input: "save/RestoreV2:475" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_476" + op: "Assign" + input: "bert/encoder/layer_7/attention/self/value/kernel/adam_v" + input: "save/RestoreV2:476" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_477" + op: "Assign" + input: "bert/encoder/layer_7/intermediate/dense/bias" + input: "save/RestoreV2:477" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_478" + op: "Assign" + input: "bert/encoder/layer_7/intermediate/dense/bias/adam_m" + input: "save/RestoreV2:478" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_479" + op: "Assign" + input: "bert/encoder/layer_7/intermediate/dense/bias/adam_v" + input: "save/RestoreV2:479" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_480" + op: "Assign" + input: "bert/encoder/layer_7/intermediate/dense/kernel" + input: "save/RestoreV2:480" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_481" + op: "Assign" + input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_m" + input: "save/RestoreV2:481" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_482" + op: "Assign" + input: "bert/encoder/layer_7/intermediate/dense/kernel/adam_v" + input: "save/RestoreV2:482" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_483" + op: "Assign" + input: "bert/encoder/layer_7/output/LayerNorm/beta" + input: "save/RestoreV2:483" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_484" + op: "Assign" + input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:484" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_485" + op: "Assign" + input: "bert/encoder/layer_7/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:485" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_486" + op: "Assign" + input: "bert/encoder/layer_7/output/LayerNorm/gamma" + input: "save/RestoreV2:486" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_487" + op: "Assign" + input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:487" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_488" + op: "Assign" + input: "bert/encoder/layer_7/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:488" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_489" + op: "Assign" + input: "bert/encoder/layer_7/output/dense/bias" + input: "save/RestoreV2:489" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_490" + op: "Assign" + input: "bert/encoder/layer_7/output/dense/bias/adam_m" + input: "save/RestoreV2:490" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_491" + op: "Assign" + input: "bert/encoder/layer_7/output/dense/bias/adam_v" + input: "save/RestoreV2:491" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_492" + op: "Assign" + input: "bert/encoder/layer_7/output/dense/kernel" + input: "save/RestoreV2:492" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_493" + op: "Assign" + input: "bert/encoder/layer_7/output/dense/kernel/adam_m" + input: "save/RestoreV2:493" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_494" + op: "Assign" + input: "bert/encoder/layer_7/output/dense/kernel/adam_v" + input: "save/RestoreV2:494" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_7/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_495" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta" + input: "save/RestoreV2:495" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_496" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:496" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_497" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:497" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_498" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma" + input: "save/RestoreV2:498" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_499" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:499" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_500" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:500" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_501" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/dense/bias" + input: "save/RestoreV2:501" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_502" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/dense/bias/adam_m" + input: "save/RestoreV2:502" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_503" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/dense/bias/adam_v" + input: "save/RestoreV2:503" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_504" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/dense/kernel" + input: "save/RestoreV2:504" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_505" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_m" + input: "save/RestoreV2:505" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_506" + op: "Assign" + input: "bert/encoder/layer_8/attention/output/dense/kernel/adam_v" + input: "save/RestoreV2:506" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_507" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/key/bias" + input: "save/RestoreV2:507" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_508" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/key/bias/adam_m" + input: "save/RestoreV2:508" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_509" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/key/bias/adam_v" + input: "save/RestoreV2:509" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_510" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/key/kernel" + input: "save/RestoreV2:510" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_511" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/key/kernel/adam_m" + input: "save/RestoreV2:511" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_512" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/key/kernel/adam_v" + input: "save/RestoreV2:512" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_513" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/query/bias" + input: "save/RestoreV2:513" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_514" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/query/bias/adam_m" + input: "save/RestoreV2:514" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_515" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/query/bias/adam_v" + input: "save/RestoreV2:515" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_516" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/query/kernel" + input: "save/RestoreV2:516" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_517" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/query/kernel/adam_m" + input: "save/RestoreV2:517" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_518" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/query/kernel/adam_v" + input: "save/RestoreV2:518" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_519" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/value/bias" + input: "save/RestoreV2:519" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_520" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/value/bias/adam_m" + input: "save/RestoreV2:520" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_521" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/value/bias/adam_v" + input: "save/RestoreV2:521" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_522" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/value/kernel" + input: "save/RestoreV2:522" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_523" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/value/kernel/adam_m" + input: "save/RestoreV2:523" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_524" + op: "Assign" + input: "bert/encoder/layer_8/attention/self/value/kernel/adam_v" + input: "save/RestoreV2:524" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_525" + op: "Assign" + input: "bert/encoder/layer_8/intermediate/dense/bias" + input: "save/RestoreV2:525" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_526" + op: "Assign" + input: "bert/encoder/layer_8/intermediate/dense/bias/adam_m" + input: "save/RestoreV2:526" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_527" + op: "Assign" + input: "bert/encoder/layer_8/intermediate/dense/bias/adam_v" + input: "save/RestoreV2:527" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_528" + op: "Assign" + input: "bert/encoder/layer_8/intermediate/dense/kernel" + input: "save/RestoreV2:528" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_529" + op: "Assign" + input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_m" + input: "save/RestoreV2:529" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_530" + op: "Assign" + input: "bert/encoder/layer_8/intermediate/dense/kernel/adam_v" + input: "save/RestoreV2:530" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_531" + op: "Assign" + input: "bert/encoder/layer_8/output/LayerNorm/beta" + input: "save/RestoreV2:531" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_532" + op: "Assign" + input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:532" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_533" + op: "Assign" + input: "bert/encoder/layer_8/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:533" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_534" + op: "Assign" + input: "bert/encoder/layer_8/output/LayerNorm/gamma" + input: "save/RestoreV2:534" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_535" + op: "Assign" + input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:535" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_536" + op: "Assign" + input: "bert/encoder/layer_8/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:536" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_537" + op: "Assign" + input: "bert/encoder/layer_8/output/dense/bias" + input: "save/RestoreV2:537" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_538" + op: "Assign" + input: "bert/encoder/layer_8/output/dense/bias/adam_m" + input: "save/RestoreV2:538" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_539" + op: "Assign" + input: "bert/encoder/layer_8/output/dense/bias/adam_v" + input: "save/RestoreV2:539" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_540" + op: "Assign" + input: "bert/encoder/layer_8/output/dense/kernel" + input: "save/RestoreV2:540" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_541" + op: "Assign" + input: "bert/encoder/layer_8/output/dense/kernel/adam_m" + input: "save/RestoreV2:541" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_542" + op: "Assign" + input: "bert/encoder/layer_8/output/dense/kernel/adam_v" + input: "save/RestoreV2:542" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_8/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_543" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta" + input: "save/RestoreV2:543" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_544" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:544" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_545" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:545" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_546" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma" + input: "save/RestoreV2:546" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_547" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:547" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_548" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:548" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_549" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/dense/bias" + input: "save/RestoreV2:549" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_550" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/dense/bias/adam_m" + input: "save/RestoreV2:550" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_551" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/dense/bias/adam_v" + input: "save/RestoreV2:551" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_552" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/dense/kernel" + input: "save/RestoreV2:552" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_553" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_m" + input: "save/RestoreV2:553" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_554" + op: "Assign" + input: "bert/encoder/layer_9/attention/output/dense/kernel/adam_v" + input: "save/RestoreV2:554" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_555" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/key/bias" + input: "save/RestoreV2:555" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_556" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/key/bias/adam_m" + input: "save/RestoreV2:556" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_557" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/key/bias/adam_v" + input: "save/RestoreV2:557" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_558" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/key/kernel" + input: "save/RestoreV2:558" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_559" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/key/kernel/adam_m" + input: "save/RestoreV2:559" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_560" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/key/kernel/adam_v" + input: "save/RestoreV2:560" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_561" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/query/bias" + input: "save/RestoreV2:561" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_562" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/query/bias/adam_m" + input: "save/RestoreV2:562" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_563" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/query/bias/adam_v" + input: "save/RestoreV2:563" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_564" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/query/kernel" + input: "save/RestoreV2:564" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_565" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/query/kernel/adam_m" + input: "save/RestoreV2:565" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_566" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/query/kernel/adam_v" + input: "save/RestoreV2:566" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_567" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/value/bias" + input: "save/RestoreV2:567" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_568" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/value/bias/adam_m" + input: "save/RestoreV2:568" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_569" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/value/bias/adam_v" + input: "save/RestoreV2:569" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_570" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/value/kernel" + input: "save/RestoreV2:570" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_571" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/value/kernel/adam_m" + input: "save/RestoreV2:571" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_572" + op: "Assign" + input: "bert/encoder/layer_9/attention/self/value/kernel/adam_v" + input: "save/RestoreV2:572" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/attention/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_573" + op: "Assign" + input: "bert/encoder/layer_9/intermediate/dense/bias" + input: "save/RestoreV2:573" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_574" + op: "Assign" + input: "bert/encoder/layer_9/intermediate/dense/bias/adam_m" + input: "save/RestoreV2:574" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_575" + op: "Assign" + input: "bert/encoder/layer_9/intermediate/dense/bias/adam_v" + input: "save/RestoreV2:575" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_576" + op: "Assign" + input: "bert/encoder/layer_9/intermediate/dense/kernel" + input: "save/RestoreV2:576" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_577" + op: "Assign" + input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_m" + input: "save/RestoreV2:577" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_578" + op: "Assign" + input: "bert/encoder/layer_9/intermediate/dense/kernel/adam_v" + input: "save/RestoreV2:578" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_579" + op: "Assign" + input: "bert/encoder/layer_9/output/LayerNorm/beta" + input: "save/RestoreV2:579" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_580" + op: "Assign" + input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_m" + input: "save/RestoreV2:580" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_581" + op: "Assign" + input: "bert/encoder/layer_9/output/LayerNorm/beta/adam_v" + input: "save/RestoreV2:581" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_582" + op: "Assign" + input: "bert/encoder/layer_9/output/LayerNorm/gamma" + input: "save/RestoreV2:582" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_583" + op: "Assign" + input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:583" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_584" + op: "Assign" + input: "bert/encoder/layer_9/output/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:584" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_585" + op: "Assign" + input: "bert/encoder/layer_9/output/dense/bias" + input: "save/RestoreV2:585" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_586" + op: "Assign" + input: "bert/encoder/layer_9/output/dense/bias/adam_m" + input: "save/RestoreV2:586" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_587" + op: "Assign" + input: "bert/encoder/layer_9/output/dense/bias/adam_v" + input: "save/RestoreV2:587" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_588" + op: "Assign" + input: "bert/encoder/layer_9/output/dense/kernel" + input: "save/RestoreV2:588" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_589" + op: "Assign" + input: "bert/encoder/layer_9/output/dense/kernel/adam_m" + input: "save/RestoreV2:589" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_590" + op: "Assign" + input: "bert/encoder/layer_9/output/dense/kernel/adam_v" + input: "save/RestoreV2:590" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/layer_9/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_591" + op: "Assign" + input: "bert/pooler/dense/bias" + input: "save/RestoreV2:591" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_592" + op: "Assign" + input: "bert/pooler/dense/bias/adam_m" + input: "save/RestoreV2:592" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_593" + op: "Assign" + input: "bert/pooler/dense/bias/adam_v" + input: "save/RestoreV2:593" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_594" + op: "Assign" + input: "bert/pooler/dense/kernel" + input: "save/RestoreV2:594" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_595" + op: "Assign" + input: "bert/pooler/dense/kernel/adam_m" + input: "save/RestoreV2:595" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_596" + op: "Assign" + input: "bert/pooler/dense/kernel/adam_v" + input: "save/RestoreV2:596" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_597" + op: "Assign" + input: "cls/predictions/output_bias" + input: "save/RestoreV2:597" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_598" + op: "Assign" + input: "cls/predictions/output_bias/adam_m" + input: "save/RestoreV2:598" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_599" + op: "Assign" + input: "cls/predictions/output_bias/adam_v" + input: "save/RestoreV2:599" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 28996 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_600" + op: "Assign" + input: "cls/predictions/transform/LayerNorm/beta" + input: "save/RestoreV2:600" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_601" + op: "Assign" + input: "cls/predictions/transform/LayerNorm/beta/adam_m" + input: "save/RestoreV2:601" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_602" + op: "Assign" + input: "cls/predictions/transform/LayerNorm/beta/adam_v" + input: "save/RestoreV2:602" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_603" + op: "Assign" + input: "cls/predictions/transform/LayerNorm/gamma" + input: "save/RestoreV2:603" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_604" + op: "Assign" + input: "cls/predictions/transform/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:604" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_605" + op: "Assign" + input: "cls/predictions/transform/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:605" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_606" + op: "Assign" + input: "cls/predictions/transform/dense/bias" + input: "save/RestoreV2:606" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_607" + op: "Assign" + input: "cls/predictions/transform/dense/bias/adam_m" + input: "save/RestoreV2:607" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_608" + op: "Assign" + input: "cls/predictions/transform/dense/bias/adam_v" + input: "save/RestoreV2:608" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_609" + op: "Assign" + input: "cls/predictions/transform/dense/kernel" + input: "save/RestoreV2:609" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_610" + op: "Assign" + input: "cls/predictions/transform/dense/kernel/adam_m" + input: "save/RestoreV2:610" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_611" + op: "Assign" + input: "cls/predictions/transform/dense/kernel/adam_v" + input: "save/RestoreV2:611" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_612" + op: "Assign" + input: "cls/seq_relationship/output_bias" + input: "save/RestoreV2:612" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_613" + op: "Assign" + input: "cls/seq_relationship/output_bias/adam_m" + input: "save/RestoreV2:613" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_614" + op: "Assign" + input: "cls/seq_relationship/output_bias/adam_v" + input: "save/RestoreV2:614" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_615" + op: "Assign" + input: "cls/seq_relationship/output_weights" + input: "save/RestoreV2:615" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_616" + op: "Assign" + input: "cls/seq_relationship/output_weights/adam_m" + input: "save/RestoreV2:616" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_617" + op: "Assign" + input: "cls/seq_relationship/output_weights/adam_v" + input: "save/RestoreV2:617" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Identity_1" + op: "Identity" + input: "save/RestoreV2:618" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + unknown_rank: true + } + } + } + } +} +node { + name: "save/AssignVariableOp" + op: "AssignVariableOp" + input: "global_step" + input: "save/Identity_1" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } +} +node { + name: "save/restore_shard" + op: "NoOp" + input: "^save/Assign" + input: "^save/AssignVariableOp" + input: "^save/Assign_1" + input: "^save/Assign_10" + input: "^save/Assign_100" + input: "^save/Assign_101" + input: "^save/Assign_102" + input: "^save/Assign_103" + input: "^save/Assign_104" + input: "^save/Assign_105" + input: "^save/Assign_106" + input: "^save/Assign_107" + input: "^save/Assign_108" + input: "^save/Assign_109" + input: "^save/Assign_11" + input: "^save/Assign_110" + input: "^save/Assign_111" + input: "^save/Assign_112" + input: "^save/Assign_113" + input: "^save/Assign_114" + input: "^save/Assign_115" + input: "^save/Assign_116" + input: "^save/Assign_117" + input: "^save/Assign_118" + input: "^save/Assign_119" + input: "^save/Assign_12" + input: "^save/Assign_120" + input: "^save/Assign_121" + input: "^save/Assign_122" + input: "^save/Assign_123" + input: "^save/Assign_124" + input: "^save/Assign_125" + input: "^save/Assign_126" + input: "^save/Assign_127" + input: "^save/Assign_128" + input: "^save/Assign_129" + input: "^save/Assign_13" + input: "^save/Assign_130" + input: "^save/Assign_131" + input: "^save/Assign_132" + input: "^save/Assign_133" + input: "^save/Assign_134" + input: "^save/Assign_135" + input: "^save/Assign_136" + input: "^save/Assign_137" + input: "^save/Assign_138" + input: "^save/Assign_139" + input: "^save/Assign_14" + input: "^save/Assign_140" + input: "^save/Assign_141" + input: "^save/Assign_142" + input: "^save/Assign_143" + input: "^save/Assign_144" + input: "^save/Assign_145" + input: "^save/Assign_146" + input: "^save/Assign_147" + input: "^save/Assign_148" + input: "^save/Assign_149" + input: "^save/Assign_15" + input: "^save/Assign_150" + input: "^save/Assign_151" + input: "^save/Assign_152" + input: "^save/Assign_153" + input: "^save/Assign_154" + input: "^save/Assign_155" + input: "^save/Assign_156" + input: "^save/Assign_157" + input: "^save/Assign_158" + input: "^save/Assign_159" + input: "^save/Assign_16" + input: "^save/Assign_160" + input: "^save/Assign_161" + input: "^save/Assign_162" + input: "^save/Assign_163" + input: "^save/Assign_164" + input: "^save/Assign_165" + input: "^save/Assign_166" + input: "^save/Assign_167" + input: "^save/Assign_168" + input: "^save/Assign_169" + input: "^save/Assign_17" + input: "^save/Assign_170" + input: "^save/Assign_171" + input: "^save/Assign_172" + input: "^save/Assign_173" + input: "^save/Assign_174" + input: "^save/Assign_175" + input: "^save/Assign_176" + input: "^save/Assign_177" + input: "^save/Assign_178" + input: "^save/Assign_179" + input: "^save/Assign_18" + input: "^save/Assign_180" + input: "^save/Assign_181" + input: "^save/Assign_182" + input: "^save/Assign_183" + input: "^save/Assign_184" + input: "^save/Assign_185" + input: "^save/Assign_186" + input: "^save/Assign_187" + input: "^save/Assign_188" + input: "^save/Assign_189" + input: "^save/Assign_19" + input: "^save/Assign_190" + input: "^save/Assign_191" + input: "^save/Assign_192" + input: "^save/Assign_193" + input: "^save/Assign_194" + input: "^save/Assign_195" + input: "^save/Assign_196" + input: "^save/Assign_197" + input: "^save/Assign_198" + input: "^save/Assign_199" + input: "^save/Assign_2" + input: "^save/Assign_20" + input: "^save/Assign_200" + input: "^save/Assign_201" + input: "^save/Assign_202" + input: "^save/Assign_203" + input: "^save/Assign_204" + input: "^save/Assign_205" + input: "^save/Assign_206" + input: "^save/Assign_207" + input: "^save/Assign_208" + input: "^save/Assign_209" + input: "^save/Assign_21" + input: "^save/Assign_210" + input: "^save/Assign_211" + input: "^save/Assign_212" + input: "^save/Assign_213" + input: "^save/Assign_214" + input: "^save/Assign_215" + input: "^save/Assign_216" + input: "^save/Assign_217" + input: "^save/Assign_218" + input: "^save/Assign_219" + input: "^save/Assign_22" + input: "^save/Assign_220" + input: "^save/Assign_221" + input: "^save/Assign_222" + input: "^save/Assign_223" + input: "^save/Assign_224" + input: "^save/Assign_225" + input: "^save/Assign_226" + input: "^save/Assign_227" + input: "^save/Assign_228" + input: "^save/Assign_229" + input: "^save/Assign_23" + input: "^save/Assign_230" + input: "^save/Assign_231" + input: "^save/Assign_232" + input: "^save/Assign_233" + input: "^save/Assign_234" + input: "^save/Assign_235" + input: "^save/Assign_236" + input: "^save/Assign_237" + input: "^save/Assign_238" + input: "^save/Assign_239" + input: "^save/Assign_24" + input: "^save/Assign_240" + input: "^save/Assign_241" + input: "^save/Assign_242" + input: "^save/Assign_243" + input: "^save/Assign_244" + input: "^save/Assign_245" + input: "^save/Assign_246" + input: "^save/Assign_247" + input: "^save/Assign_248" + input: "^save/Assign_249" + input: "^save/Assign_25" + input: "^save/Assign_250" + input: "^save/Assign_251" + input: "^save/Assign_252" + input: "^save/Assign_253" + input: "^save/Assign_254" + input: "^save/Assign_255" + input: "^save/Assign_256" + input: "^save/Assign_257" + input: "^save/Assign_258" + input: "^save/Assign_259" + input: "^save/Assign_26" + input: "^save/Assign_260" + input: "^save/Assign_261" + input: "^save/Assign_262" + input: "^save/Assign_263" + input: "^save/Assign_264" + input: "^save/Assign_265" + input: "^save/Assign_266" + input: "^save/Assign_267" + input: "^save/Assign_268" + input: "^save/Assign_269" + input: "^save/Assign_27" + input: "^save/Assign_270" + input: "^save/Assign_271" + input: "^save/Assign_272" + input: "^save/Assign_273" + input: "^save/Assign_274" + input: "^save/Assign_275" + input: "^save/Assign_276" + input: "^save/Assign_277" + input: "^save/Assign_278" + input: "^save/Assign_279" + input: "^save/Assign_28" + input: "^save/Assign_280" + input: "^save/Assign_281" + input: "^save/Assign_282" + input: "^save/Assign_283" + input: "^save/Assign_284" + input: "^save/Assign_285" + input: "^save/Assign_286" + input: "^save/Assign_287" + input: "^save/Assign_288" + input: "^save/Assign_289" + input: "^save/Assign_29" + input: "^save/Assign_290" + input: "^save/Assign_291" + input: "^save/Assign_292" + input: "^save/Assign_293" + input: "^save/Assign_294" + input: "^save/Assign_295" + input: "^save/Assign_296" + input: "^save/Assign_297" + input: "^save/Assign_298" + input: "^save/Assign_299" + input: "^save/Assign_3" + input: "^save/Assign_30" + input: "^save/Assign_300" + input: "^save/Assign_301" + input: "^save/Assign_302" + input: "^save/Assign_303" + input: "^save/Assign_304" + input: "^save/Assign_305" + input: "^save/Assign_306" + input: "^save/Assign_307" + input: "^save/Assign_308" + input: "^save/Assign_309" + input: "^save/Assign_31" + input: "^save/Assign_310" + input: "^save/Assign_311" + input: "^save/Assign_312" + input: "^save/Assign_313" + input: "^save/Assign_314" + input: "^save/Assign_315" + input: "^save/Assign_316" + input: "^save/Assign_317" + input: "^save/Assign_318" + input: "^save/Assign_319" + input: "^save/Assign_32" + input: "^save/Assign_320" + input: "^save/Assign_321" + input: "^save/Assign_322" + input: "^save/Assign_323" + input: "^save/Assign_324" + input: "^save/Assign_325" + input: "^save/Assign_326" + input: "^save/Assign_327" + input: "^save/Assign_328" + input: "^save/Assign_329" + input: "^save/Assign_33" + input: "^save/Assign_330" + input: "^save/Assign_331" + input: "^save/Assign_332" + input: "^save/Assign_333" + input: "^save/Assign_334" + input: "^save/Assign_335" + input: "^save/Assign_336" + input: "^save/Assign_337" + input: "^save/Assign_338" + input: "^save/Assign_339" + input: "^save/Assign_34" + input: "^save/Assign_340" + input: "^save/Assign_341" + input: "^save/Assign_342" + input: "^save/Assign_343" + input: "^save/Assign_344" + input: "^save/Assign_345" + input: "^save/Assign_346" + input: "^save/Assign_347" + input: "^save/Assign_348" + input: "^save/Assign_349" + input: "^save/Assign_35" + input: "^save/Assign_350" + input: "^save/Assign_351" + input: "^save/Assign_352" + input: "^save/Assign_353" + input: "^save/Assign_354" + input: "^save/Assign_355" + input: "^save/Assign_356" + input: "^save/Assign_357" + input: "^save/Assign_358" + input: "^save/Assign_359" + input: "^save/Assign_36" + input: "^save/Assign_360" + input: "^save/Assign_361" + input: "^save/Assign_362" + input: "^save/Assign_363" + input: "^save/Assign_364" + input: "^save/Assign_365" + input: "^save/Assign_366" + input: "^save/Assign_367" + input: "^save/Assign_368" + input: "^save/Assign_369" + input: "^save/Assign_37" + input: "^save/Assign_370" + input: "^save/Assign_371" + input: "^save/Assign_372" + input: "^save/Assign_373" + input: "^save/Assign_374" + input: "^save/Assign_375" + input: "^save/Assign_376" + input: "^save/Assign_377" + input: "^save/Assign_378" + input: "^save/Assign_379" + input: "^save/Assign_38" + input: "^save/Assign_380" + input: "^save/Assign_381" + input: "^save/Assign_382" + input: "^save/Assign_383" + input: "^save/Assign_384" + input: "^save/Assign_385" + input: "^save/Assign_386" + input: "^save/Assign_387" + input: "^save/Assign_388" + input: "^save/Assign_389" + input: "^save/Assign_39" + input: "^save/Assign_390" + input: "^save/Assign_391" + input: "^save/Assign_392" + input: "^save/Assign_393" + input: "^save/Assign_394" + input: "^save/Assign_395" + input: "^save/Assign_396" + input: "^save/Assign_397" + input: "^save/Assign_398" + input: "^save/Assign_399" + input: "^save/Assign_4" + input: "^save/Assign_40" + input: "^save/Assign_400" + input: "^save/Assign_401" + input: "^save/Assign_402" + input: "^save/Assign_403" + input: "^save/Assign_404" + input: "^save/Assign_405" + input: "^save/Assign_406" + input: "^save/Assign_407" + input: "^save/Assign_408" + input: "^save/Assign_409" + input: "^save/Assign_41" + input: "^save/Assign_410" + input: "^save/Assign_411" + input: "^save/Assign_412" + input: "^save/Assign_413" + input: "^save/Assign_414" + input: "^save/Assign_415" + input: "^save/Assign_416" + input: "^save/Assign_417" + input: "^save/Assign_418" + input: "^save/Assign_419" + input: "^save/Assign_42" + input: "^save/Assign_420" + input: "^save/Assign_421" + input: "^save/Assign_422" + input: "^save/Assign_423" + input: "^save/Assign_424" + input: "^save/Assign_425" + input: "^save/Assign_426" + input: "^save/Assign_427" + input: "^save/Assign_428" + input: "^save/Assign_429" + input: "^save/Assign_43" + input: "^save/Assign_430" + input: "^save/Assign_431" + input: "^save/Assign_432" + input: "^save/Assign_433" + input: "^save/Assign_434" + input: "^save/Assign_435" + input: "^save/Assign_436" + input: "^save/Assign_437" + input: "^save/Assign_438" + input: "^save/Assign_439" + input: "^save/Assign_44" + input: "^save/Assign_440" + input: "^save/Assign_441" + input: "^save/Assign_442" + input: "^save/Assign_443" + input: "^save/Assign_444" + input: "^save/Assign_445" + input: "^save/Assign_446" + input: "^save/Assign_447" + input: "^save/Assign_448" + input: "^save/Assign_449" + input: "^save/Assign_45" + input: "^save/Assign_450" + input: "^save/Assign_451" + input: "^save/Assign_452" + input: "^save/Assign_453" + input: "^save/Assign_454" + input: "^save/Assign_455" + input: "^save/Assign_456" + input: "^save/Assign_457" + input: "^save/Assign_458" + input: "^save/Assign_459" + input: "^save/Assign_46" + input: "^save/Assign_460" + input: "^save/Assign_461" + input: "^save/Assign_462" + input: "^save/Assign_463" + input: "^save/Assign_464" + input: "^save/Assign_465" + input: "^save/Assign_466" + input: "^save/Assign_467" + input: "^save/Assign_468" + input: "^save/Assign_469" + input: "^save/Assign_47" + input: "^save/Assign_470" + input: "^save/Assign_471" + input: "^save/Assign_472" + input: "^save/Assign_473" + input: "^save/Assign_474" + input: "^save/Assign_475" + input: "^save/Assign_476" + input: "^save/Assign_477" + input: "^save/Assign_478" + input: "^save/Assign_479" + input: "^save/Assign_48" + input: "^save/Assign_480" + input: "^save/Assign_481" + input: "^save/Assign_482" + input: "^save/Assign_483" + input: "^save/Assign_484" + input: "^save/Assign_485" + input: "^save/Assign_486" + input: "^save/Assign_487" + input: "^save/Assign_488" + input: "^save/Assign_489" + input: "^save/Assign_49" + input: "^save/Assign_490" + input: "^save/Assign_491" + input: "^save/Assign_492" + input: "^save/Assign_493" + input: "^save/Assign_494" + input: "^save/Assign_495" + input: "^save/Assign_496" + input: "^save/Assign_497" + input: "^save/Assign_498" + input: "^save/Assign_499" + input: "^save/Assign_5" + input: "^save/Assign_50" + input: "^save/Assign_500" + input: "^save/Assign_501" + input: "^save/Assign_502" + input: "^save/Assign_503" + input: "^save/Assign_504" + input: "^save/Assign_505" + input: "^save/Assign_506" + input: "^save/Assign_507" + input: "^save/Assign_508" + input: "^save/Assign_509" + input: "^save/Assign_51" + input: "^save/Assign_510" + input: "^save/Assign_511" + input: "^save/Assign_512" + input: "^save/Assign_513" + input: "^save/Assign_514" + input: "^save/Assign_515" + input: "^save/Assign_516" + input: "^save/Assign_517" + input: "^save/Assign_518" + input: "^save/Assign_519" + input: "^save/Assign_52" + input: "^save/Assign_520" + input: "^save/Assign_521" + input: "^save/Assign_522" + input: "^save/Assign_523" + input: "^save/Assign_524" + input: "^save/Assign_525" + input: "^save/Assign_526" + input: "^save/Assign_527" + input: "^save/Assign_528" + input: "^save/Assign_529" + input: "^save/Assign_53" + input: "^save/Assign_530" + input: "^save/Assign_531" + input: "^save/Assign_532" + input: "^save/Assign_533" + input: "^save/Assign_534" + input: "^save/Assign_535" + input: "^save/Assign_536" + input: "^save/Assign_537" + input: "^save/Assign_538" + input: "^save/Assign_539" + input: "^save/Assign_54" + input: "^save/Assign_540" + input: "^save/Assign_541" + input: "^save/Assign_542" + input: "^save/Assign_543" + input: "^save/Assign_544" + input: "^save/Assign_545" + input: "^save/Assign_546" + input: "^save/Assign_547" + input: "^save/Assign_548" + input: "^save/Assign_549" + input: "^save/Assign_55" + input: "^save/Assign_550" + input: "^save/Assign_551" + input: "^save/Assign_552" + input: "^save/Assign_553" + input: "^save/Assign_554" + input: "^save/Assign_555" + input: "^save/Assign_556" + input: "^save/Assign_557" + input: "^save/Assign_558" + input: "^save/Assign_559" + input: "^save/Assign_56" + input: "^save/Assign_560" + input: "^save/Assign_561" + input: "^save/Assign_562" + input: "^save/Assign_563" + input: "^save/Assign_564" + input: "^save/Assign_565" + input: "^save/Assign_566" + input: "^save/Assign_567" + input: "^save/Assign_568" + input: "^save/Assign_569" + input: "^save/Assign_57" + input: "^save/Assign_570" + input: "^save/Assign_571" + input: "^save/Assign_572" + input: "^save/Assign_573" + input: "^save/Assign_574" + input: "^save/Assign_575" + input: "^save/Assign_576" + input: "^save/Assign_577" + input: "^save/Assign_578" + input: "^save/Assign_579" + input: "^save/Assign_58" + input: "^save/Assign_580" + input: "^save/Assign_581" + input: "^save/Assign_582" + input: "^save/Assign_583" + input: "^save/Assign_584" + input: "^save/Assign_585" + input: "^save/Assign_586" + input: "^save/Assign_587" + input: "^save/Assign_588" + input: "^save/Assign_589" + input: "^save/Assign_59" + input: "^save/Assign_590" + input: "^save/Assign_591" + input: "^save/Assign_592" + input: "^save/Assign_593" + input: "^save/Assign_594" + input: "^save/Assign_595" + input: "^save/Assign_596" + input: "^save/Assign_597" + input: "^save/Assign_598" + input: "^save/Assign_599" + input: "^save/Assign_6" + input: "^save/Assign_60" + input: "^save/Assign_600" + input: "^save/Assign_601" + input: "^save/Assign_602" + input: "^save/Assign_603" + input: "^save/Assign_604" + input: "^save/Assign_605" + input: "^save/Assign_606" + input: "^save/Assign_607" + input: "^save/Assign_608" + input: "^save/Assign_609" + input: "^save/Assign_61" + input: "^save/Assign_610" + input: "^save/Assign_611" + input: "^save/Assign_612" + input: "^save/Assign_613" + input: "^save/Assign_614" + input: "^save/Assign_615" + input: "^save/Assign_616" + input: "^save/Assign_617" + input: "^save/Assign_62" + input: "^save/Assign_63" + input: "^save/Assign_64" + input: "^save/Assign_65" + input: "^save/Assign_66" + input: "^save/Assign_67" + input: "^save/Assign_68" + input: "^save/Assign_69" + input: "^save/Assign_7" + input: "^save/Assign_70" + input: "^save/Assign_71" + input: "^save/Assign_72" + input: "^save/Assign_73" + input: "^save/Assign_74" + input: "^save/Assign_75" + input: "^save/Assign_76" + input: "^save/Assign_77" + input: "^save/Assign_78" + input: "^save/Assign_79" + input: "^save/Assign_8" + input: "^save/Assign_80" + input: "^save/Assign_81" + input: "^save/Assign_82" + input: "^save/Assign_83" + input: "^save/Assign_84" + input: "^save/Assign_85" + input: "^save/Assign_86" + input: "^save/Assign_87" + input: "^save/Assign_88" + input: "^save/Assign_89" + input: "^save/Assign_9" + input: "^save/Assign_90" + input: "^save/Assign_91" + input: "^save/Assign_92" + input: "^save/Assign_93" + input: "^save/Assign_94" + input: "^save/Assign_95" + input: "^save/Assign_96" + input: "^save/Assign_97" + input: "^save/Assign_98" + input: "^save/Assign_99" +} +node { + name: "save/restore_all" + op: "NoOp" + input: "^save/restore_shard" +} +library { + function { + signature { + name: "tf_data_structured_function_wrapper_9z4XDFOWUdQ" + input_arg { + name: "arg0" + type: DT_STRING + } + output_arg { + name: "toint32" + type: DT_INT32 + } + output_arg { + name: "toint32_1" + type: DT_INT32 + } + output_arg { + name: "toint32_2" + type: DT_INT32 + } + output_arg { + name: "toint32_3" + type: DT_INT32 + } + output_arg { + name: "parsesingleexample_parsesingleexample" + type: DT_FLOAT + } + output_arg { + name: "toint32_4" + type: DT_INT32 + } + output_arg { + name: "toint32_5" + type: DT_INT32 + } + description: "Wrapper for passing nested structures to and from tf.data functions." + } + node_def { + name: "ParseSingleExample/Const" + op: "Const" + device: "/device:CPU:0" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + dim { + } + } + } + } + } + } + node_def { + name: "ParseSingleExample/Const_1" + op: "Const" + device: "/device:CPU:0" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + dim { + } + } + } + } + } + } + node_def { + name: "ParseSingleExample/Const_2" + op: "Const" + device: "/device:CPU:0" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + dim { + } + } + } + } + } + } + node_def { + name: "ParseSingleExample/Const_3" + op: "Const" + device: "/device:CPU:0" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + dim { + } + } + } + } + } + } + node_def { + name: "ParseSingleExample/Const_4" + op: "Const" + device: "/device:CPU:0" + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + } + } + } + } + } + } + node_def { + name: "ParseSingleExample/Const_5" + op: "Const" + device: "/device:CPU:0" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + dim { + } + } + } + } + } + } + node_def { + name: "ParseSingleExample/Const_6" + op: "Const" + device: "/device:CPU:0" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + dim { + } + } + } + } + } + } + node_def { + name: "ParseSingleExample/ParseSingleExample" + op: "ParseSingleExample" + input: "arg0" + input: "ParseSingleExample/Const:output:0" + input: "ParseSingleExample/Const_1:output:0" + input: "ParseSingleExample/Const_2:output:0" + input: "ParseSingleExample/Const_3:output:0" + input: "ParseSingleExample/Const_4:output:0" + input: "ParseSingleExample/Const_5:output:0" + input: "ParseSingleExample/Const_6:output:0" + device: "/device:CPU:0" + attr { + key: "Tdense" + value { + list { + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_FLOAT + type: DT_INT64 + type: DT_INT64 + } + } + } + attr { + key: "dense_keys" + value { + list { + s: "input_ids" + s: "input_mask" + s: "masked_lm_ids" + s: "masked_lm_positions" + s: "masked_lm_weights" + s: "next_sentence_labels" + s: "segment_ids" + } + } + } + attr { + key: "dense_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + shape { + dim { + size: 128 + } + } + shape { + dim { + size: 20 + } + } + shape { + dim { + size: 20 + } + } + shape { + dim { + size: 20 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "num_sparse" + value { + i: 0 + } + } + attr { + key: "sparse_keys" + value { + list { + } + } + } + attr { + key: "sparse_types" + value { + list { + } + } + } + } + node_def { + name: "ToInt32" + op: "Cast" + input: "ParseSingleExample/ParseSingleExample:dense_values:0" + device: "/device:CPU:0" + attr { + key: "DstT" + value { + type: DT_INT32 + } + } + attr { + key: "SrcT" + value { + type: DT_INT64 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + } + node_def { + name: "ToInt32_1" + op: "Cast" + input: "ParseSingleExample/ParseSingleExample:dense_values:1" + device: "/device:CPU:0" + attr { + key: "DstT" + value { + type: DT_INT32 + } + } + attr { + key: "SrcT" + value { + type: DT_INT64 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + } + node_def { + name: "ToInt32_2" + op: "Cast" + input: "ParseSingleExample/ParseSingleExample:dense_values:2" + device: "/device:CPU:0" + attr { + key: "DstT" + value { + type: DT_INT32 + } + } + attr { + key: "SrcT" + value { + type: DT_INT64 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + } + node_def { + name: "ToInt32_3" + op: "Cast" + input: "ParseSingleExample/ParseSingleExample:dense_values:3" + device: "/device:CPU:0" + attr { + key: "DstT" + value { + type: DT_INT32 + } + } + attr { + key: "SrcT" + value { + type: DT_INT64 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + } + node_def { + name: "ToInt32_4" + op: "Cast" + input: "ParseSingleExample/ParseSingleExample:dense_values:5" + device: "/device:CPU:0" + attr { + key: "DstT" + value { + type: DT_INT32 + } + } + attr { + key: "SrcT" + value { + type: DT_INT64 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + } + node_def { + name: "ToInt32_5" + op: "Cast" + input: "ParseSingleExample/ParseSingleExample:dense_values:6" + device: "/device:CPU:0" + attr { + key: "DstT" + value { + type: DT_INT32 + } + } + attr { + key: "SrcT" + value { + type: DT_INT64 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + } + ret { + key: "parsesingleexample_parsesingleexample" + value: "ParseSingleExample/ParseSingleExample:dense_values:4" + } + ret { + key: "toint32" + value: "ToInt32:y:0" + } + ret { + key: "toint32_1" + value: "ToInt32_1:y:0" + } + ret { + key: "toint32_2" + value: "ToInt32_2:y:0" + } + ret { + key: "toint32_3" + value: "ToInt32_3:y:0" + } + ret { + key: "toint32_4" + value: "ToInt32_4:y:0" + } + ret { + key: "toint32_5" + value: "ToInt32_5:y:0" + } + } + function { + signature { + name: "tf_data_structured_function_wrapper_52oRgbbAY9U" + input_arg { + name: "arg0" + type: DT_STRING + } + output_arg { + name: "tfrecorddataset" + type: DT_VARIANT + } + description: "Wrapper for passing nested structures to and from tf.data functions." + is_stateful: true + } + node_def { + name: "compression_type" + op: "Const" + device: "/device:CPU:0" + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "" + } + } + } + } + node_def { + name: "buffer_size" + op: "Const" + device: "/device:CPU:0" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 262144 + } + } + } + } + node_def { + name: "TFRecordDataset" + op: "TFRecordDataset" + input: "arg0" + input: "compression_type:output:0" + input: "buffer_size:output:0" + device: "/device:CPU:0" + } + ret { + key: "tfrecorddataset" + value: "TFRecordDataset:handle:0" + } + } + function { + signature { + name: "tf_data_structured_function_wrapper_rn3e6kArW78" + input_arg { + name: "arg0" + type: DT_STRING + } + output_arg { + name: "flatmapdataset" + type: DT_VARIANT + } + description: "Wrapper for passing nested structures to and from tf.data functions." + is_stateful: true + } + node_def { + name: "flat_filenames/shape" + op: "Const" + device: "/device:CPU:0" + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -1 + } + } + } + } + node_def { + name: "flat_filenames" + op: "Reshape" + input: "arg0" + input: "flat_filenames/shape:output:0" + device: "/device:CPU:0" + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + } + node_def { + name: "TensorSliceDataset" + op: "TensorSliceDataset" + input: "flat_filenames:output:0" + device: "/device:CPU:0" + attr { + key: "Toutput_types" + value { + list { + type: DT_STRING + } + } + } + attr { + key: "output_shapes" + value { + list { + shape { + } + } + } + } + } + node_def { + name: "FlatMapDataset" + op: "FlatMapDataset" + input: "TensorSliceDataset:handle:0" + device: "/device:CPU:0" + attr { + key: "Targuments" + value { + list { + } + } + } + attr { + key: "f" + value { + func { + name: "tf_data_structured_function_wrapper_52oRgbbAY9U" + } + } + } + attr { + key: "output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_STRING + } + } + } + } + ret { + key: "flatmapdataset" + value: "FlatMapDataset:handle:0" + } + } +} +versions { + producer: 27 + min_consumer: 12 +}