diff --git a/.gitattributes b/.gitattributes index ac481c8eb05e4d2496fbe076a38a7b4835dd733d..47dd0ca34dac13c04ef83179349b61c466bd2cb8 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,6 +1,7 @@ *.7z filter=lfs diff=lfs merge=lfs -text *.arrow filter=lfs diff=lfs merge=lfs -text *.bin filter=lfs diff=lfs merge=lfs -text +*.bin.* filter=lfs diff=lfs merge=lfs -text *.bz2 filter=lfs diff=lfs merge=lfs -text *.ftz filter=lfs diff=lfs merge=lfs -text *.gz filter=lfs diff=lfs merge=lfs -text @@ -25,3 +26,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zstandard filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +checkpoint*/** filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..ba0430d26c996e7f078385407f959c96c271087c --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +__pycache__/ \ No newline at end of file diff --git a/base_pretrain.gin b/base_pretrain.gin new file mode 100644 index 0000000000000000000000000000000000000000..dc396e0a00454da26f77c9cf8814879030982ae6 --- /dev/null +++ b/base_pretrain.gin @@ -0,0 +1,24 @@ +# Register necessary SeqIO Tasks/Mixtures. +from __gin__ import dynamic_registration +from t5x import utils +import tasks +import __main__ as train_script + +include 't5x/examples/t5/byt5/base.gin' +include 't5x/configs/runs/pretrain.gin' + + +# ------------------- Training specification overrides -------------------------- +train_script.train: + eval_period = 10000 + +utils.SaveCheckpointConfig: + period = 10000 + keep = 10 + +MIXTURE_OR_TASK_NAME = "byt5_pretrain_finnish" +USE_CACHED_TASKS = False +TASK_FEATURE_LENGTHS = {"inputs": 512, "targets": 512} +TRAIN_STEPS = 1000000 +DROPOUT_RATE = 0.0 +BATCH_SIZE = 256 \ No newline at end of file diff --git a/checkpoint_10000/checkpoint b/checkpoint_10000/checkpoint new file mode 100644 index 0000000000000000000000000000000000000000..6a706c637582420c91d376a916a17c8f1738da7c --- /dev/null +++ b/checkpoint_10000/checkpoint @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f34bdb8f96c3559b2aa10df0336250ae19e3270a0e5f830a682a13469f252bb +size 2792047 diff --git a/checkpoint_10000/state.param_states.decoder.decoder_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.decoder.decoder_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.decoder.decoder_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.decoder.decoder_norm.scale.v/0 b/checkpoint_10000/state.param_states.decoder.decoder_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..4191726dd5e46b2792be3df36f9cf9fd526bb0f6 --- /dev/null +++ b/checkpoint_10000/state.param_states.decoder.decoder_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b02e30ad784125e9ecc40ddf27786a86edbfb3bc8f46a51f668ee3b5c5b7af5 +size 5546 diff --git a/checkpoint_10000/state.param_states.decoder.layers_0.pre_cross_attention_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.decoder.layers_0.pre_cross_attention_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.decoder.layers_0.pre_cross_attention_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.decoder.layers_0.pre_cross_attention_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.decoder.layers_0.pre_cross_attention_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..8d35084d42857008cd5528ba9251b6190b4f29d1 --- /dev/null +++ b/checkpoint_10000/state.param_states.decoder.layers_0.pre_cross_attention_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d49d5808306f7001c8056a954c8a196fa69ec63ef873f816d8b8eae1f71fb898 +size 5540 diff --git a/checkpoint_10000/state.param_states.decoder.layers_0.pre_mlp_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.decoder.layers_0.pre_mlp_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.decoder.layers_0.pre_mlp_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.decoder.layers_0.pre_mlp_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.decoder.layers_0.pre_mlp_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..8cf13359072b68e587dce2f7a2373ac3f1f2e7d4 --- /dev/null +++ b/checkpoint_10000/state.param_states.decoder.layers_0.pre_mlp_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d250f8cd70a199e4797f733a3b469a7e7eb0110b7c31835f2421db9dc3ecd3b7 +size 5471 diff --git a/checkpoint_10000/state.param_states.decoder.layers_0.pre_self_attention_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.decoder.layers_0.pre_self_attention_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.decoder.layers_0.pre_self_attention_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.decoder.layers_0.pre_self_attention_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.decoder.layers_0.pre_self_attention_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..d2e04d7d1a448d681fc1449af0ec01f019cb9074 --- /dev/null +++ b/checkpoint_10000/state.param_states.decoder.layers_0.pre_self_attention_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:139c78ca54e195479efa3da47e9e2a76d57e7e1d833cd7eec1cc0fee412242da +size 5591 diff --git a/checkpoint_10000/state.param_states.decoder.layers_1.pre_cross_attention_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.decoder.layers_1.pre_cross_attention_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.decoder.layers_1.pre_cross_attention_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.decoder.layers_1.pre_cross_attention_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.decoder.layers_1.pre_cross_attention_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..aa1683e60f3d7faf12fe9906ffe49046c5e95dd1 --- /dev/null +++ b/checkpoint_10000/state.param_states.decoder.layers_1.pre_cross_attention_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81e7f913939f945cd1baf3a05ec2b26f2e69f0f24bd0e25c32a881aaa65d727f +size 5502 diff --git a/checkpoint_10000/state.param_states.decoder.layers_1.pre_mlp_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.decoder.layers_1.pre_mlp_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.decoder.layers_1.pre_mlp_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.decoder.layers_1.pre_mlp_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.decoder.layers_1.pre_mlp_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..ebf7b602eb53d88b2ccbf556899952017a5ca9be --- /dev/null +++ b/checkpoint_10000/state.param_states.decoder.layers_1.pre_mlp_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3acae52d1597753f9b2f366627567acff7de0aaee8620109d0b73bbdfc4d789 +size 5473 diff --git a/checkpoint_10000/state.param_states.decoder.layers_1.pre_self_attention_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.decoder.layers_1.pre_self_attention_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.decoder.layers_1.pre_self_attention_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.decoder.layers_1.pre_self_attention_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.decoder.layers_1.pre_self_attention_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..4bf283ac0698304ce6469a32799a7455cf3c0b6b --- /dev/null +++ b/checkpoint_10000/state.param_states.decoder.layers_1.pre_self_attention_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dca47bf50d898efde5d9b10fdb8b83b008ca24c693098c0cf26609f7b86d19c2 +size 5555 diff --git a/checkpoint_10000/state.param_states.decoder.layers_2.pre_cross_attention_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.decoder.layers_2.pre_cross_attention_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.decoder.layers_2.pre_cross_attention_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.decoder.layers_2.pre_cross_attention_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.decoder.layers_2.pre_cross_attention_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..aa953fd53303537712a11f4e7643ad3681d0c093 --- /dev/null +++ b/checkpoint_10000/state.param_states.decoder.layers_2.pre_cross_attention_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cb56573b3bc10b358a676e511073f579b80d0d8563c015dd0ff8927ffcfb23b +size 5485 diff --git a/checkpoint_10000/state.param_states.decoder.layers_2.pre_mlp_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.decoder.layers_2.pre_mlp_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.decoder.layers_2.pre_mlp_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.decoder.layers_2.pre_mlp_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.decoder.layers_2.pre_mlp_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..0bb62d7519cb3a493f1eb0fcf4c2307427d07aef --- /dev/null +++ b/checkpoint_10000/state.param_states.decoder.layers_2.pre_mlp_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a86495308631fe5372d96b05d2a3f9f2a860402ec111b4ad96322bd353df94b +size 5495 diff --git a/checkpoint_10000/state.param_states.decoder.layers_2.pre_self_attention_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.decoder.layers_2.pre_self_attention_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.decoder.layers_2.pre_self_attention_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.decoder.layers_2.pre_self_attention_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.decoder.layers_2.pre_self_attention_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..a3bd2c6474ad7532f77799db7d2acd66ed8a47a8 --- /dev/null +++ b/checkpoint_10000/state.param_states.decoder.layers_2.pre_self_attention_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fa259d85404d2c6348dab818abfd2d22a87153fb0197bb42fda9d8b0e934850 +size 5536 diff --git a/checkpoint_10000/state.param_states.decoder.layers_3.pre_cross_attention_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.decoder.layers_3.pre_cross_attention_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.decoder.layers_3.pre_cross_attention_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.decoder.layers_3.pre_cross_attention_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.decoder.layers_3.pre_cross_attention_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..022c06a74750c69d86b2728483955cee7ebfd488 --- /dev/null +++ b/checkpoint_10000/state.param_states.decoder.layers_3.pre_cross_attention_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a89715c7324274863f7983995ff8945c639c2be07648214651796e987c2815a +size 5506 diff --git a/checkpoint_10000/state.param_states.decoder.layers_3.pre_mlp_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.decoder.layers_3.pre_mlp_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.decoder.layers_3.pre_mlp_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.decoder.layers_3.pre_mlp_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.decoder.layers_3.pre_mlp_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..61dd9bfe3d7d4a4cd3055503171acb8a9a062ef8 --- /dev/null +++ b/checkpoint_10000/state.param_states.decoder.layers_3.pre_mlp_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72c5a77f7158105215a9509ba57a0369b033e11f752c902d5b77bd5d1beaaeac +size 5503 diff --git a/checkpoint_10000/state.param_states.decoder.layers_3.pre_self_attention_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.decoder.layers_3.pre_self_attention_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.decoder.layers_3.pre_self_attention_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.decoder.layers_3.pre_self_attention_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.decoder.layers_3.pre_self_attention_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..bb30fa55db4c2f4dfb19a4ce0172521ae1f9dca1 --- /dev/null +++ b/checkpoint_10000/state.param_states.decoder.layers_3.pre_self_attention_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:162c30ff3d061438785dac40e5d6cb8a6e3f844a2616d2049719086ff42181f5 +size 5547 diff --git a/checkpoint_10000/state.param_states.decoder.layers_4.pre_cross_attention_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.decoder.layers_4.pre_cross_attention_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.decoder.layers_4.pre_cross_attention_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.decoder.layers_4.pre_cross_attention_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.decoder.layers_4.pre_cross_attention_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..48e5ee5a11d22a362f7f5e85e91f7556b0582337 --- /dev/null +++ b/checkpoint_10000/state.param_states.decoder.layers_4.pre_cross_attention_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0613f2003d094908c3faf2875b5ca9680991ab912772224f169d0701b6278685 +size 5519 diff --git a/checkpoint_10000/state.param_states.decoder.layers_4.pre_mlp_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.decoder.layers_4.pre_mlp_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.decoder.layers_4.pre_mlp_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.decoder.layers_4.pre_mlp_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.decoder.layers_4.pre_mlp_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..33cbf7d3214905330499e13f4dd544b208168f3d --- /dev/null +++ b/checkpoint_10000/state.param_states.decoder.layers_4.pre_mlp_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85ab5fa4ac6b702e9e97e14f437c7e52b832b2706a73467d1c40db33996b84be +size 5570 diff --git a/checkpoint_10000/state.param_states.decoder.layers_4.pre_self_attention_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.decoder.layers_4.pre_self_attention_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.decoder.layers_4.pre_self_attention_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.decoder.layers_4.pre_self_attention_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.decoder.layers_4.pre_self_attention_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..514a666bba7d64836d20ef82ea428d87681b3ca0 --- /dev/null +++ b/checkpoint_10000/state.param_states.decoder.layers_4.pre_self_attention_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8b19d46610a38c8edbb56dd6263a1798dff8027238c4a16e8e6983b3033d929 +size 5576 diff --git a/checkpoint_10000/state.param_states.decoder.layers_5.pre_cross_attention_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.decoder.layers_5.pre_cross_attention_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.decoder.layers_5.pre_cross_attention_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.decoder.layers_5.pre_cross_attention_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.decoder.layers_5.pre_cross_attention_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..1245262220dffbf47ccf5ae334f43aab4d4cc99c --- /dev/null +++ b/checkpoint_10000/state.param_states.decoder.layers_5.pre_cross_attention_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a517e404b1a6df3abe86ac685c1b4de20f848ffdd10bb3b12a57c61f7b0946c +size 5519 diff --git a/checkpoint_10000/state.param_states.decoder.layers_5.pre_mlp_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.decoder.layers_5.pre_mlp_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.decoder.layers_5.pre_mlp_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.decoder.layers_5.pre_mlp_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.decoder.layers_5.pre_mlp_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..b7603371dd8b94ea30e3d687179251553c681226 --- /dev/null +++ b/checkpoint_10000/state.param_states.decoder.layers_5.pre_mlp_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6cf26d2e6e6708e7e2c13c6f84a57acbb46fbbd3493b2df94e563c3637ecf84 +size 5549 diff --git a/checkpoint_10000/state.param_states.decoder.layers_5.pre_self_attention_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.decoder.layers_5.pre_self_attention_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.decoder.layers_5.pre_self_attention_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.decoder.layers_5.pre_self_attention_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.decoder.layers_5.pre_self_attention_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..5c0a2d3687e83b383405b43c0c9b0bff4fe53ef4 --- /dev/null +++ b/checkpoint_10000/state.param_states.decoder.layers_5.pre_self_attention_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aeaf0ab88e801024c13a0052c0c3e8073369c859e28a72dca19868806dbd6005 +size 5665 diff --git a/checkpoint_10000/state.param_states.decoder.relpos_bias.rel_embedding.v/.zarray b/checkpoint_10000/state.param_states.decoder.relpos_bias.rel_embedding.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f0ebb438b0df77a995f437c9cf19936cd0a38f79 --- /dev/null +++ b/checkpoint_10000/state.param_states.decoder.relpos_bias.rel_embedding.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7028b3f2007ef72746d64487314ca6723cab7f9713ecb30bd30b73a22418e831 +size 172 diff --git a/checkpoint_10000/state.param_states.decoder.relpos_bias.rel_embedding.v/0.0 b/checkpoint_10000/state.param_states.decoder.relpos_bias.rel_embedding.v/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..c2c09e15c6ce7620b8d045d2fd4450a8b87be48b --- /dev/null +++ b/checkpoint_10000/state.param_states.decoder.relpos_bias.rel_embedding.v/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65284ef7b0de15a72da6fcca4a522092b8d4301a5aafea385486e85988ad0b85 +size 1445 diff --git a/checkpoint_10000/state.param_states.encoder.encoder_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.encoder.encoder_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.encoder_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.encoder.encoder_norm.scale.v/0 b/checkpoint_10000/state.param_states.encoder.encoder_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..c762822c0dcc71724892a4cf5de426c76eaf94ea --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.encoder_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b58a73db9acb2ae71f77eb4c6753399fed91531c45ee86df63123339242683a +size 5543 diff --git a/checkpoint_10000/state.param_states.encoder.layers_0.pre_attention_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.encoder.layers_0.pre_attention_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_0.pre_attention_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.encoder.layers_0.pre_attention_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.encoder.layers_0.pre_attention_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..43a523cdb0e594542663c2cdf33570c8dc9acfb0 --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_0.pre_attention_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2eac16a2916d7d8082fa87fc236150b4cf77a5f8e4c8802cd530015cabdf199b +size 5614 diff --git a/checkpoint_10000/state.param_states.encoder.layers_0.pre_mlp_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.encoder.layers_0.pre_mlp_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_0.pre_mlp_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.encoder.layers_0.pre_mlp_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.encoder.layers_0.pre_mlp_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..34272c943c3a703e2ac3cc870fe6a793a62c5f33 --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_0.pre_mlp_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:149e9f6c7a89621bdf03c0abc8ef305ae9ac74d6a9408d3ee8b0ce8c0971314e +size 5437 diff --git a/checkpoint_10000/state.param_states.encoder.layers_1.pre_attention_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.encoder.layers_1.pre_attention_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_1.pre_attention_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.encoder.layers_1.pre_attention_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.encoder.layers_1.pre_attention_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..48be5510bd8dc175f362daae70357620073a35b1 --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_1.pre_attention_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7514596b929e280e44993615e87a161285ef7d5a4e746842a3290580fbdb8efe +size 5490 diff --git a/checkpoint_10000/state.param_states.encoder.layers_1.pre_mlp_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.encoder.layers_1.pre_mlp_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_1.pre_mlp_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.encoder.layers_1.pre_mlp_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.encoder.layers_1.pre_mlp_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..82f24fbda339e423b24cebae52b14a2db326c612 --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_1.pre_mlp_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf91a7e5fc4e684460d87dc80448c1fec2bf6d3562c2af9b36331d3fb144808f +size 5488 diff --git a/checkpoint_10000/state.param_states.encoder.layers_10.pre_attention_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.encoder.layers_10.pre_attention_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_10.pre_attention_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.encoder.layers_10.pre_attention_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.encoder.layers_10.pre_attention_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..84c2baad874c09b5427f57c113ddae3ce2335ee2 --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_10.pre_attention_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cac37b408a2b34ce0f72dfb402b7f0070030e3d6f989edaf0dd67471607eeb9e +size 5504 diff --git a/checkpoint_10000/state.param_states.encoder.layers_10.pre_mlp_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.encoder.layers_10.pre_mlp_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_10.pre_mlp_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.encoder.layers_10.pre_mlp_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.encoder.layers_10.pre_mlp_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..2257fd554cea56ad231e5f902367ce9b887f3cf2 --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_10.pre_mlp_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f211006d4dc42ebcd75f4364f3c33e7931da536c1121df27986508be770dc0d +size 5433 diff --git a/checkpoint_10000/state.param_states.encoder.layers_11.pre_attention_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.encoder.layers_11.pre_attention_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_11.pre_attention_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.encoder.layers_11.pre_attention_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.encoder.layers_11.pre_attention_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..0fea489f79483d0d77f63eb74767ad45b5ec548a --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_11.pre_attention_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bebac51b85b3897a6a9d90c5398f6fd6f2f438f7194253d92b4f65b62d50c84f +size 5522 diff --git a/checkpoint_10000/state.param_states.encoder.layers_11.pre_mlp_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.encoder.layers_11.pre_mlp_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_11.pre_mlp_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.encoder.layers_11.pre_mlp_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.encoder.layers_11.pre_mlp_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..76aee2aaac42a71dade62c61281b1592ef737a51 --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_11.pre_mlp_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79713d2b26ec99244b8a035459f50c8ee97f74969297cbdf6adf24d7432b6afd +size 5457 diff --git a/checkpoint_10000/state.param_states.encoder.layers_12.pre_attention_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.encoder.layers_12.pre_attention_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_12.pre_attention_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.encoder.layers_12.pre_attention_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.encoder.layers_12.pre_attention_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..d777a2f676f3837d60924db6096ab9aaa97f5cbd --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_12.pre_attention_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fe2bbaf6f5ae73ce0c5d59a183d36ec373e9a197a161f14629cded99c56ffab +size 5496 diff --git a/checkpoint_10000/state.param_states.encoder.layers_12.pre_mlp_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.encoder.layers_12.pre_mlp_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_12.pre_mlp_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.encoder.layers_12.pre_mlp_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.encoder.layers_12.pre_mlp_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..4cc20d5f31d8c9a1a9573bff90da53fa64a62faa --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_12.pre_mlp_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81b0ed3d61102ca676229b5f6884b04aafffcaacbc5a48056fcf96e55e693a9c +size 5461 diff --git a/checkpoint_10000/state.param_states.encoder.layers_13.pre_attention_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.encoder.layers_13.pre_attention_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_13.pre_attention_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.encoder.layers_13.pre_attention_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.encoder.layers_13.pre_attention_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..52474385d364c5260cc8fe9f42953d8a885fe13d --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_13.pre_attention_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0585e98ec22a4085c1a36cb5b04f12d5307ade3b458fbee818425c6c9b5eec8a +size 5548 diff --git a/checkpoint_10000/state.param_states.encoder.layers_13.pre_mlp_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.encoder.layers_13.pre_mlp_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_13.pre_mlp_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.encoder.layers_13.pre_mlp_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.encoder.layers_13.pre_mlp_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..3783944fabac89ac220c591281b619298cb2f47f --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_13.pre_mlp_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3314ae269ac28bd9be61cf8e9507ca440796990742a6f91c555e69d2c9475c3 +size 5487 diff --git a/checkpoint_10000/state.param_states.encoder.layers_14.pre_attention_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.encoder.layers_14.pre_attention_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_14.pre_attention_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.encoder.layers_14.pre_attention_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.encoder.layers_14.pre_attention_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..fee33aeccc82c3816ccaaf3ac03a0dd20e876951 --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_14.pre_attention_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3c28169c90c920c7c539d7435327a8b9d89740c6929277f752e41006383acb6 +size 5542 diff --git a/checkpoint_10000/state.param_states.encoder.layers_14.pre_mlp_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.encoder.layers_14.pre_mlp_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_14.pre_mlp_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.encoder.layers_14.pre_mlp_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.encoder.layers_14.pre_mlp_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..93b0f2d7387947d75a5062c24410790342937ce9 --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_14.pre_mlp_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d38b74bbc19a7bbef8328f9cd3a3f246d4d7eccc7a618b5bf8fe02bcfb749d8 +size 5485 diff --git a/checkpoint_10000/state.param_states.encoder.layers_15.pre_attention_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.encoder.layers_15.pre_attention_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_15.pre_attention_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.encoder.layers_15.pre_attention_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.encoder.layers_15.pre_attention_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..46de8b8ce0040c9501054d504e6b7dd82bb9c26e --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_15.pre_attention_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d08c315b49f83acc6dca541133adc0738f168e2588bff6204e3566e267e67d6 +size 5508 diff --git a/checkpoint_10000/state.param_states.encoder.layers_15.pre_mlp_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.encoder.layers_15.pre_mlp_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_15.pre_mlp_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.encoder.layers_15.pre_mlp_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.encoder.layers_15.pre_mlp_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..b78c59870e0a1e7714d9996ef3f2a33d6e4757a0 --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_15.pre_mlp_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dfaf451163158a04cf01df8fd872a5d1328d77edfa32de40cba9e0ece50436b +size 5521 diff --git a/checkpoint_10000/state.param_states.encoder.layers_16.pre_attention_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.encoder.layers_16.pre_attention_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_16.pre_attention_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.encoder.layers_16.pre_attention_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.encoder.layers_16.pre_attention_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..91dccc18378cbc07d90e0bae07fea1414f4f238f --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_16.pre_attention_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca122c04fb7f915162519978ee20e22496761562455ae0f41653e25f10d4e022 +size 5508 diff --git a/checkpoint_10000/state.param_states.encoder.layers_16.pre_mlp_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.encoder.layers_16.pre_mlp_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_16.pre_mlp_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.encoder.layers_16.pre_mlp_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.encoder.layers_16.pre_mlp_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..89dce4d65a8f3c21ebc7b202afeea7f0b78957ff --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_16.pre_mlp_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c4b5aeddcb362792e06f6675e7886020ba9f488369fb93921a464bb9b942367 +size 5551 diff --git a/checkpoint_10000/state.param_states.encoder.layers_17.pre_attention_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.encoder.layers_17.pre_attention_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_17.pre_attention_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.encoder.layers_17.pre_attention_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.encoder.layers_17.pre_attention_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..f64c3fd4e841bc26345075ea86cc5032be7e5067 --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_17.pre_attention_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:051dfb469cd3a70268493d0e601ce1d32cc22ea88d0f8236ec571891fa1468a2 +size 5522 diff --git a/checkpoint_10000/state.param_states.encoder.layers_17.pre_mlp_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.encoder.layers_17.pre_mlp_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_17.pre_mlp_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.encoder.layers_17.pre_mlp_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.encoder.layers_17.pre_mlp_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..efda6f7bd1117e32a149061ccc5ac441a9fef5a7 --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_17.pre_mlp_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6be4a1290669c7c7c6de871f50c2f5f11482a9af78ef210a16bbafa9727ee5a8 +size 5575 diff --git a/checkpoint_10000/state.param_states.encoder.layers_2.pre_attention_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.encoder.layers_2.pre_attention_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_2.pre_attention_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.encoder.layers_2.pre_attention_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.encoder.layers_2.pre_attention_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..cef662f2f068bfbab05b634e7cb0ba64fecf2e96 --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_2.pre_attention_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8ba7b25bff70f88df8034d3807ac703798eddeae57b7d9a91db0a7a5ec21f28 +size 5490 diff --git a/checkpoint_10000/state.param_states.encoder.layers_2.pre_mlp_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.encoder.layers_2.pre_mlp_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_2.pre_mlp_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.encoder.layers_2.pre_mlp_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.encoder.layers_2.pre_mlp_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..b1181de74d897a30e8c06f86dbf4ad2ce0a92ee1 --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_2.pre_mlp_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f29e743ae2e804cd32ba8f7c7470286d9c04607759fe88202d757cb7328f7cb1 +size 5468 diff --git a/checkpoint_10000/state.param_states.encoder.layers_3.pre_attention_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.encoder.layers_3.pre_attention_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_3.pre_attention_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.encoder.layers_3.pre_attention_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.encoder.layers_3.pre_attention_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..854037e349ac51612e220c4b8559cf98d40cc300 --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_3.pre_attention_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:157e79cdd615adc8a0d8f0a5fd8091081543335037e28d65e4bee932708cdc3c +size 5498 diff --git a/checkpoint_10000/state.param_states.encoder.layers_3.pre_mlp_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.encoder.layers_3.pre_mlp_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_3.pre_mlp_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.encoder.layers_3.pre_mlp_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.encoder.layers_3.pre_mlp_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..068f8324391e2a820fd46ebce02c618d762ceb51 --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_3.pre_mlp_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b59135fb3fcbe3a6e324fbf1ebbf2317535f48f606bf55be753915789a391d58 +size 5470 diff --git a/checkpoint_10000/state.param_states.encoder.layers_4.pre_attention_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.encoder.layers_4.pre_attention_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_4.pre_attention_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.encoder.layers_4.pre_attention_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.encoder.layers_4.pre_attention_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..c1cc0b528f0625ae2b2fddccf0da2693b837f9b0 --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_4.pre_attention_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2c2221e4822c944e21160f8ceb9b3e18e9a351e48c653669e0c0036241fc14c +size 5506 diff --git a/checkpoint_10000/state.param_states.encoder.layers_4.pre_mlp_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.encoder.layers_4.pre_mlp_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_4.pre_mlp_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.encoder.layers_4.pre_mlp_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.encoder.layers_4.pre_mlp_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..6250ddf31dbe8904bf62289459a00df2485fec18 --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_4.pre_mlp_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e5e26db55e7fd239e61eeddfdded8ddaa516e108c90dba81ecf10986fb710b6 +size 5487 diff --git a/checkpoint_10000/state.param_states.encoder.layers_5.pre_attention_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.encoder.layers_5.pre_attention_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_5.pre_attention_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.encoder.layers_5.pre_attention_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.encoder.layers_5.pre_attention_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..26356287d6496d6bcf4445bbefee18ad29cde5e9 --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_5.pre_attention_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd04675c07cc0ab67d31303b726316deddc6c8b8dbc2a9c51571f3938d2e2116 +size 5487 diff --git a/checkpoint_10000/state.param_states.encoder.layers_5.pre_mlp_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.encoder.layers_5.pre_mlp_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_5.pre_mlp_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.encoder.layers_5.pre_mlp_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.encoder.layers_5.pre_mlp_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..92f746eeba9f5ec56801906a611b0f7b90c3d61d --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_5.pre_mlp_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce360a86398fd4f4b904423aab04d951795e14dd1212dc181e9c3b5b1d707569 +size 5476 diff --git a/checkpoint_10000/state.param_states.encoder.layers_6.pre_attention_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.encoder.layers_6.pre_attention_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_6.pre_attention_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.encoder.layers_6.pre_attention_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.encoder.layers_6.pre_attention_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..e18e965cfe072872e07a9e39bc1a80bca0c4e1aa --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_6.pre_attention_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb843f4ada11b86634cfc4f46298fc2f5f6e25b698d1a9be668b3937ca90f5a1 +size 5509 diff --git a/checkpoint_10000/state.param_states.encoder.layers_6.pre_mlp_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.encoder.layers_6.pre_mlp_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_6.pre_mlp_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.encoder.layers_6.pre_mlp_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.encoder.layers_6.pre_mlp_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..34e55750497c1fd553c100e7ba2c4a4ee334c8bd --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_6.pre_mlp_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88d1b1ae448ac8d5dcec36848ad3a7d565aab26d65a9ded60eb7011380c4f546 +size 5487 diff --git a/checkpoint_10000/state.param_states.encoder.layers_7.pre_attention_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.encoder.layers_7.pre_attention_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_7.pre_attention_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.encoder.layers_7.pre_attention_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.encoder.layers_7.pre_attention_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..b39982432e266c1f88de3c1b4e5a61a3b52eb7df --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_7.pre_attention_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe123ed90be42e9869aad5aa3d0775deb50d7a0df836408c4ab101c9bc496e0b +size 5513 diff --git a/checkpoint_10000/state.param_states.encoder.layers_7.pre_mlp_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.encoder.layers_7.pre_mlp_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_7.pre_mlp_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.encoder.layers_7.pre_mlp_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.encoder.layers_7.pre_mlp_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..6dab74e1e0a51b9f2b1582a975d151ce2cb50070 --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_7.pre_mlp_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29da432d9f21a9b44fa84e64289e20055878857443df6538b5e07ca418aabe0a +size 5478 diff --git a/checkpoint_10000/state.param_states.encoder.layers_8.pre_attention_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.encoder.layers_8.pre_attention_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_8.pre_attention_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.encoder.layers_8.pre_attention_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.encoder.layers_8.pre_attention_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..6ce991d493a65e19d005c8c5b180444459940df8 --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_8.pre_attention_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57e3ab3b9f33a442949934082bc811e25e573bd8d4a00aa8f70923ab68e9f791 +size 5487 diff --git a/checkpoint_10000/state.param_states.encoder.layers_8.pre_mlp_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.encoder.layers_8.pre_mlp_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_8.pre_mlp_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.encoder.layers_8.pre_mlp_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.encoder.layers_8.pre_mlp_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..8f1651b7119bb259bb617223cec440840a3da593 --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_8.pre_mlp_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2003f14ce6341d6fb6d4a9437e3bbac6d5abd2c3dabb79db743cbca0ca687ad +size 5441 diff --git a/checkpoint_10000/state.param_states.encoder.layers_9.pre_attention_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.encoder.layers_9.pre_attention_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_9.pre_attention_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.encoder.layers_9.pre_attention_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.encoder.layers_9.pre_attention_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..178f62fb92037f164093798a595add33d71a476b --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_9.pre_attention_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f666f479593f8112d8ca82672db18431459b2dfb49ab69917051a5fd6a3d9c32 +size 5501 diff --git a/checkpoint_10000/state.param_states.encoder.layers_9.pre_mlp_layer_norm.scale.v/.zarray b/checkpoint_10000/state.param_states.encoder.layers_9.pre_mlp_layer_norm.scale.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_9.pre_mlp_layer_norm.scale.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/state.param_states.encoder.layers_9.pre_mlp_layer_norm.scale.v/0 b/checkpoint_10000/state.param_states.encoder.layers_9.pre_mlp_layer_norm.scale.v/0 new file mode 100644 index 0000000000000000000000000000000000000000..dc16431d0a437668b086139fe2164853ba55cdbe --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.layers_9.pre_mlp_layer_norm.scale.v/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:831dd34477feda51bbe7f786194a531fbbc100d156be864adfa6cba0415807c0 +size 5415 diff --git a/checkpoint_10000/state.param_states.encoder.relpos_bias.rel_embedding.v/.zarray b/checkpoint_10000/state.param_states.encoder.relpos_bias.rel_embedding.v/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f0ebb438b0df77a995f437c9cf19936cd0a38f79 --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.relpos_bias.rel_embedding.v/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7028b3f2007ef72746d64487314ca6723cab7f9713ecb30bd30b73a22418e831 +size 172 diff --git a/checkpoint_10000/state.param_states.encoder.relpos_bias.rel_embedding.v/0.0 b/checkpoint_10000/state.param_states.encoder.relpos_bias.rel_embedding.v/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..0bb1dd8d34599f9125ee86c30bb4263c0853144a --- /dev/null +++ b/checkpoint_10000/state.param_states.encoder.relpos_bias.rel_embedding.v/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcf53c2c18a78b03b2f235cfaf2ea99f787a6235b7a6aae5dab17de069408a1c +size 1488 diff --git a/checkpoint_10000/target.decoder.decoder_norm.scale/.zarray b/checkpoint_10000/target.decoder.decoder_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.decoder.decoder_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.decoder.decoder_norm.scale/0 b/checkpoint_10000/target.decoder.decoder_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..879c6cefee0c730882cec2c7d05ed82423b8340a --- /dev/null +++ b/checkpoint_10000/target.decoder.decoder_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be08058e33fdd7dca9bdd3318d9a2db0fd9b8d9cec23cfdd076c373053ce6aaa +size 5520 diff --git a/checkpoint_10000/target.decoder.layers_0.encoder_decoder_attention.key.kernel/.zarray b/checkpoint_10000/target.decoder.layers_0.encoder_decoder_attention.key.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_0.encoder_decoder_attention.key.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.decoder.layers_0.encoder_decoder_attention.key.kernel/0.0 b/checkpoint_10000/target.decoder.layers_0.encoder_decoder_attention.key.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..307cd09649d693e5cfbe982700a1edb52ca8cbbf --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_0.encoder_decoder_attention.key.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ace0f8fa49006524687c5dc1ccb5cd8db112da4dc1ccff409b741be1b9b7bc4 +size 4396449 diff --git a/checkpoint_10000/target.decoder.layers_0.encoder_decoder_attention.out.kernel/.zarray b/checkpoint_10000/target.decoder.layers_0.encoder_decoder_attention.out.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..120376f0f1c75a027343e7502f698ea154b8203e --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_0.encoder_decoder_attention.out.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44605053a4b2d28515bc45d11e69af731efe343126253bbb183fcfd595a20b0b +size 178 diff --git a/checkpoint_10000/target.decoder.layers_0.encoder_decoder_attention.out.kernel/0.0 b/checkpoint_10000/target.decoder.layers_0.encoder_decoder_attention.out.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..e77b34022405101823941dfb70408ce9dae55c11 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_0.encoder_decoder_attention.out.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:825c3abb858e9a61ca2d4cecda8a880c04f4bff6090ca8980bef3ac91aa97a8e +size 4385274 diff --git a/checkpoint_10000/target.decoder.layers_0.encoder_decoder_attention.query.kernel/.zarray b/checkpoint_10000/target.decoder.layers_0.encoder_decoder_attention.query.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_0.encoder_decoder_attention.query.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.decoder.layers_0.encoder_decoder_attention.query.kernel/0.0 b/checkpoint_10000/target.decoder.layers_0.encoder_decoder_attention.query.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..33e29ff0f17231ad2f19ed27bf9f7b7bd5c7ca51 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_0.encoder_decoder_attention.query.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:535a2dbbdb6eea17754462fe1b2cc31f33d564bdd05d1ff1e2fbc40d8141a10a +size 4384923 diff --git a/checkpoint_10000/target.decoder.layers_0.encoder_decoder_attention.value.kernel/.zarray b/checkpoint_10000/target.decoder.layers_0.encoder_decoder_attention.value.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_0.encoder_decoder_attention.value.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.decoder.layers_0.encoder_decoder_attention.value.kernel/0.0 b/checkpoint_10000/target.decoder.layers_0.encoder_decoder_attention.value.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..9d26dd10d9ed8526295ccdb1dba03d04eefb5c44 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_0.encoder_decoder_attention.value.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0553b00382277a3a4d2a706522df74a4830adf04c609ca5d6399fea91982c08b +size 4396946 diff --git a/checkpoint_10000/target.decoder.layers_0.mlp.wi_0.kernel/.zarray b/checkpoint_10000/target.decoder.layers_0.mlp.wi_0.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_0.mlp.wi_0.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.decoder.layers_0.mlp.wi_0.kernel/0.0 b/checkpoint_10000/target.decoder.layers_0.mlp.wi_0.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..177cf89c603a17275570723a33109401da70f2fd --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_0.mlp.wi_0.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f234156048e7281f8d7345d9ee63cc773856dcb7c958820e24bf511a01749030 +size 22682334 diff --git a/checkpoint_10000/target.decoder.layers_0.mlp.wi_1.kernel/.zarray b/checkpoint_10000/target.decoder.layers_0.mlp.wi_1.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_0.mlp.wi_1.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.decoder.layers_0.mlp.wi_1.kernel/0.0 b/checkpoint_10000/target.decoder.layers_0.mlp.wi_1.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..f6d4836c1ef1103ea496405fc0cb4f16cba3bb43 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_0.mlp.wi_1.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0d28637e0673bf4ac3917ed5d2b8a68d5b68cea697098b5df6924aebe755bee +size 22698541 diff --git a/checkpoint_10000/target.decoder.layers_0.mlp.wo.kernel/.zarray b/checkpoint_10000/target.decoder.layers_0.mlp.wo.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..ee708e4e956632ad238f9d97ec32a9fb108bec46 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_0.mlp.wo.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1ae155dfdfb8a9700e2a43a95cf96febfc4df5aeb3ea32aae29dde773629c2d +size 180 diff --git a/checkpoint_10000/target.decoder.layers_0.mlp.wo.kernel/0.0 b/checkpoint_10000/target.decoder.layers_0.mlp.wo.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..86e15ce83a2c6ffc62fb6ddd9533f6c03d5c39a8 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_0.mlp.wo.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ea918e07b5ee03b0a4f1aec2bcbcae68cae0e1f8cfffcc7ca564279a0790463 +size 22701089 diff --git a/checkpoint_10000/target.decoder.layers_0.pre_cross_attention_layer_norm.scale/.zarray b/checkpoint_10000/target.decoder.layers_0.pre_cross_attention_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_0.pre_cross_attention_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.decoder.layers_0.pre_cross_attention_layer_norm.scale/0 b/checkpoint_10000/target.decoder.layers_0.pre_cross_attention_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..28b6e6888b7de3491545d64cdfc0839c3fb445f1 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_0.pre_cross_attention_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:495fd28da1aa8dbfbf19ba4f9715235471a7f194284d09a4d5258af0b1e92948 +size 5740 diff --git a/checkpoint_10000/target.decoder.layers_0.pre_mlp_layer_norm.scale/.zarray b/checkpoint_10000/target.decoder.layers_0.pre_mlp_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_0.pre_mlp_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.decoder.layers_0.pre_mlp_layer_norm.scale/0 b/checkpoint_10000/target.decoder.layers_0.pre_mlp_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..16b424f8596fac201d4a889b130dbcab6640847f --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_0.pre_mlp_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:753cae25b429114df8624347c2fa133e1ff9de4c10fc96cbcf6e1227a81f8b0b +size 5609 diff --git a/checkpoint_10000/target.decoder.layers_0.pre_self_attention_layer_norm.scale/.zarray b/checkpoint_10000/target.decoder.layers_0.pre_self_attention_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_0.pre_self_attention_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.decoder.layers_0.pre_self_attention_layer_norm.scale/0 b/checkpoint_10000/target.decoder.layers_0.pre_self_attention_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..efec024d27c0c96a6796ff25ff24038e536df418 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_0.pre_self_attention_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30983270fcd135600aef877e43e28bba1a905c34a4077e3e5113889cae44d1df +size 5674 diff --git a/checkpoint_10000/target.decoder.layers_0.self_attention.key.kernel/.zarray b/checkpoint_10000/target.decoder.layers_0.self_attention.key.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_0.self_attention.key.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.decoder.layers_0.self_attention.key.kernel/0.0 b/checkpoint_10000/target.decoder.layers_0.self_attention.key.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..5255a3e4c471bca94b8b7937e341315059783f58 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_0.self_attention.key.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:139803eee7668c102c9317d7aa01d7748889e7c1e285434f72dfed1d75fb6f5b +size 4396338 diff --git a/checkpoint_10000/target.decoder.layers_0.self_attention.out.kernel/.zarray b/checkpoint_10000/target.decoder.layers_0.self_attention.out.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..120376f0f1c75a027343e7502f698ea154b8203e --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_0.self_attention.out.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44605053a4b2d28515bc45d11e69af731efe343126253bbb183fcfd595a20b0b +size 178 diff --git a/checkpoint_10000/target.decoder.layers_0.self_attention.out.kernel/0.0 b/checkpoint_10000/target.decoder.layers_0.self_attention.out.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..4c40038dca732e2400ac1aa4979936979dd8d3a5 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_0.self_attention.out.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc77673aada1f7914466a462abc8bf9b9e2493aa48a39fbf518c97eb12d51e2e +size 4386619 diff --git a/checkpoint_10000/target.decoder.layers_0.self_attention.query.kernel/.zarray b/checkpoint_10000/target.decoder.layers_0.self_attention.query.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_0.self_attention.query.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.decoder.layers_0.self_attention.query.kernel/0.0 b/checkpoint_10000/target.decoder.layers_0.self_attention.query.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..71f882a563fe4cfcf4e9ab6e272c39bc1ee79ff3 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_0.self_attention.query.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b890fa08d3378263a943f9bd5c7292f2c183787fe1f81425c1992ae266ed790b +size 4384002 diff --git a/checkpoint_10000/target.decoder.layers_0.self_attention.value.kernel/.zarray b/checkpoint_10000/target.decoder.layers_0.self_attention.value.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_0.self_attention.value.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.decoder.layers_0.self_attention.value.kernel/0.0 b/checkpoint_10000/target.decoder.layers_0.self_attention.value.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..d8baef55a431df5a6693655cee6d55f607f1efc3 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_0.self_attention.value.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:832b8a9fc13cec9ff4f2e3094889f258c546168b0189dc174aa7f548ab5b668d +size 4396922 diff --git a/checkpoint_10000/target.decoder.layers_1.encoder_decoder_attention.key.kernel/.zarray b/checkpoint_10000/target.decoder.layers_1.encoder_decoder_attention.key.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_1.encoder_decoder_attention.key.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.decoder.layers_1.encoder_decoder_attention.key.kernel/0.0 b/checkpoint_10000/target.decoder.layers_1.encoder_decoder_attention.key.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..70fc15dd9b7a1355e5946aae0e45f0f00b8be48e --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_1.encoder_decoder_attention.key.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b195e48c0990a0a3b1d60a4792eff6f3df6f953f82f6f8719dd1f15a126915ff +size 4396402 diff --git a/checkpoint_10000/target.decoder.layers_1.encoder_decoder_attention.out.kernel/.zarray b/checkpoint_10000/target.decoder.layers_1.encoder_decoder_attention.out.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..120376f0f1c75a027343e7502f698ea154b8203e --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_1.encoder_decoder_attention.out.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44605053a4b2d28515bc45d11e69af731efe343126253bbb183fcfd595a20b0b +size 178 diff --git a/checkpoint_10000/target.decoder.layers_1.encoder_decoder_attention.out.kernel/0.0 b/checkpoint_10000/target.decoder.layers_1.encoder_decoder_attention.out.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..61801d6c8230acf9553a20717a6d563616ead0b4 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_1.encoder_decoder_attention.out.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eac11f4bf0fd4ff889d2c00c8a8187c544cfdd1996ee1e7bee937bb8bc8c5bbd +size 4385760 diff --git a/checkpoint_10000/target.decoder.layers_1.encoder_decoder_attention.query.kernel/.zarray b/checkpoint_10000/target.decoder.layers_1.encoder_decoder_attention.query.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_1.encoder_decoder_attention.query.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.decoder.layers_1.encoder_decoder_attention.query.kernel/0.0 b/checkpoint_10000/target.decoder.layers_1.encoder_decoder_attention.query.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..03dd6ee15a9d281fbd8525adfd85842464473703 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_1.encoder_decoder_attention.query.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13c40f1aa0de4b04c79cc07ee331ce2067754a356c89764d52468ddf0b58944b +size 4385836 diff --git a/checkpoint_10000/target.decoder.layers_1.encoder_decoder_attention.value.kernel/.zarray b/checkpoint_10000/target.decoder.layers_1.encoder_decoder_attention.value.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_1.encoder_decoder_attention.value.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.decoder.layers_1.encoder_decoder_attention.value.kernel/0.0 b/checkpoint_10000/target.decoder.layers_1.encoder_decoder_attention.value.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..f507184b73838b74b21ea0b9babca5e4c4713bbc --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_1.encoder_decoder_attention.value.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48c8b91f16b892cbc200d17fc39076a31fb3d97ccaed3dfeede6c1be49164d69 +size 4396355 diff --git a/checkpoint_10000/target.decoder.layers_1.mlp.wi_0.kernel/.zarray b/checkpoint_10000/target.decoder.layers_1.mlp.wi_0.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_1.mlp.wi_0.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.decoder.layers_1.mlp.wi_0.kernel/0.0 b/checkpoint_10000/target.decoder.layers_1.mlp.wi_0.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..cc2a8192c4b8b3d83ab41019b6d5209f13c87a81 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_1.mlp.wi_0.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7eb305a1dae027c74bf4a56365a90a1594339c613e879f0de0a9bdb3c9f2d204 +size 22669691 diff --git a/checkpoint_10000/target.decoder.layers_1.mlp.wi_1.kernel/.zarray b/checkpoint_10000/target.decoder.layers_1.mlp.wi_1.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_1.mlp.wi_1.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.decoder.layers_1.mlp.wi_1.kernel/0.0 b/checkpoint_10000/target.decoder.layers_1.mlp.wi_1.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..1d57ac12c98ae3a008d69d9d1cc86a0c89f1e657 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_1.mlp.wi_1.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10e037f6a244fc6c13fa89c83c938901d14da7aed3ec11af05208f1c9b094bdf +size 22680903 diff --git a/checkpoint_10000/target.decoder.layers_1.mlp.wo.kernel/.zarray b/checkpoint_10000/target.decoder.layers_1.mlp.wo.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..ee708e4e956632ad238f9d97ec32a9fb108bec46 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_1.mlp.wo.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1ae155dfdfb8a9700e2a43a95cf96febfc4df5aeb3ea32aae29dde773629c2d +size 180 diff --git a/checkpoint_10000/target.decoder.layers_1.mlp.wo.kernel/0.0 b/checkpoint_10000/target.decoder.layers_1.mlp.wo.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..7b77432426afffcbfbe60b3753efc5842858472f --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_1.mlp.wo.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ed82f447874810929e81647a5d62afe21db39f54e629ff25436527ac577c737 +size 22705254 diff --git a/checkpoint_10000/target.decoder.layers_1.pre_cross_attention_layer_norm.scale/.zarray b/checkpoint_10000/target.decoder.layers_1.pre_cross_attention_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_1.pre_cross_attention_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.decoder.layers_1.pre_cross_attention_layer_norm.scale/0 b/checkpoint_10000/target.decoder.layers_1.pre_cross_attention_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..cd0849c5689be08d1ef03d7483faac607d101159 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_1.pre_cross_attention_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a37eb0d18994915705bb1cb24bdf6bc77c83a00dd9abb5c04e069b9455a7b616 +size 5699 diff --git a/checkpoint_10000/target.decoder.layers_1.pre_mlp_layer_norm.scale/.zarray b/checkpoint_10000/target.decoder.layers_1.pre_mlp_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_1.pre_mlp_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.decoder.layers_1.pre_mlp_layer_norm.scale/0 b/checkpoint_10000/target.decoder.layers_1.pre_mlp_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..5c9392781503c801003a57979a302c636f22c9ec --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_1.pre_mlp_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7ecee2e71bcf409868c100b04cf171c2016e2a317ee7d28aae91aa22324c2c0 +size 5616 diff --git a/checkpoint_10000/target.decoder.layers_1.pre_self_attention_layer_norm.scale/.zarray b/checkpoint_10000/target.decoder.layers_1.pre_self_attention_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_1.pre_self_attention_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.decoder.layers_1.pre_self_attention_layer_norm.scale/0 b/checkpoint_10000/target.decoder.layers_1.pre_self_attention_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..cd13e11c57abf9ec87214cf99a1c8500f5c1582b --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_1.pre_self_attention_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:362c9e5d571c4d3795aab3a4d35e872834c530184a3f6be255926c4dcaf788cd +size 5595 diff --git a/checkpoint_10000/target.decoder.layers_1.self_attention.key.kernel/.zarray b/checkpoint_10000/target.decoder.layers_1.self_attention.key.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_1.self_attention.key.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.decoder.layers_1.self_attention.key.kernel/0.0 b/checkpoint_10000/target.decoder.layers_1.self_attention.key.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..4dd25c5a81ee9f55bdc9d8fb5163262eaf76456b --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_1.self_attention.key.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:616b74a8b06b2e574aeada265d324c76713697960c797d4492631beeb19677cc +size 4396252 diff --git a/checkpoint_10000/target.decoder.layers_1.self_attention.out.kernel/.zarray b/checkpoint_10000/target.decoder.layers_1.self_attention.out.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..120376f0f1c75a027343e7502f698ea154b8203e --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_1.self_attention.out.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44605053a4b2d28515bc45d11e69af731efe343126253bbb183fcfd595a20b0b +size 178 diff --git a/checkpoint_10000/target.decoder.layers_1.self_attention.out.kernel/0.0 b/checkpoint_10000/target.decoder.layers_1.self_attention.out.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..197f94cceb51b148c164096ffdd90a055645fcee --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_1.self_attention.out.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:329841c6e7f7ac3053c5325bf057e35ceba625c040d9e9fdcc8ebdff7fcc89ec +size 4386068 diff --git a/checkpoint_10000/target.decoder.layers_1.self_attention.query.kernel/.zarray b/checkpoint_10000/target.decoder.layers_1.self_attention.query.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_1.self_attention.query.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.decoder.layers_1.self_attention.query.kernel/0.0 b/checkpoint_10000/target.decoder.layers_1.self_attention.query.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..d62bb5ce43d1fc2f97575261c7f31dd849878dfb --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_1.self_attention.query.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36eef514c174491bc1b0fee2651db7b850fd3f28d528be9936a939bc66c0bc55 +size 4386484 diff --git a/checkpoint_10000/target.decoder.layers_1.self_attention.value.kernel/.zarray b/checkpoint_10000/target.decoder.layers_1.self_attention.value.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_1.self_attention.value.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.decoder.layers_1.self_attention.value.kernel/0.0 b/checkpoint_10000/target.decoder.layers_1.self_attention.value.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..a2fd6dddc00ff90ca43c77c64f623970c22be4fc --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_1.self_attention.value.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8dbd79ff9c2f16494cc91afa03c4e030e8b0c29a5938e20e372b71a1ae81967 +size 4396850 diff --git a/checkpoint_10000/target.decoder.layers_2.encoder_decoder_attention.key.kernel/.zarray b/checkpoint_10000/target.decoder.layers_2.encoder_decoder_attention.key.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_2.encoder_decoder_attention.key.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.decoder.layers_2.encoder_decoder_attention.key.kernel/0.0 b/checkpoint_10000/target.decoder.layers_2.encoder_decoder_attention.key.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..b3a343808ae51b448510cccc4b27798211342f64 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_2.encoder_decoder_attention.key.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4311cf10697a39f223774aecb5006a5a19be8b057fcad1521427c153c301bda +size 4395322 diff --git a/checkpoint_10000/target.decoder.layers_2.encoder_decoder_attention.out.kernel/.zarray b/checkpoint_10000/target.decoder.layers_2.encoder_decoder_attention.out.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..120376f0f1c75a027343e7502f698ea154b8203e --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_2.encoder_decoder_attention.out.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44605053a4b2d28515bc45d11e69af731efe343126253bbb183fcfd595a20b0b +size 178 diff --git a/checkpoint_10000/target.decoder.layers_2.encoder_decoder_attention.out.kernel/0.0 b/checkpoint_10000/target.decoder.layers_2.encoder_decoder_attention.out.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..8d37b02b6000adbc0edda843bd6e3284beb52730 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_2.encoder_decoder_attention.out.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51d85ae2d5a87696044eb1e034ffae55f15fe4870d1398c54cb6f78327a53c6d +size 4385712 diff --git a/checkpoint_10000/target.decoder.layers_2.encoder_decoder_attention.query.kernel/.zarray b/checkpoint_10000/target.decoder.layers_2.encoder_decoder_attention.query.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_2.encoder_decoder_attention.query.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.decoder.layers_2.encoder_decoder_attention.query.kernel/0.0 b/checkpoint_10000/target.decoder.layers_2.encoder_decoder_attention.query.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..abae38074fa27bb927228fd1d7ca2bf06c5d0b24 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_2.encoder_decoder_attention.query.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f70492ea1ed1dd4dbe978d415951483a0c79008bfe912e777f300ac64efc7c91 +size 4387424 diff --git a/checkpoint_10000/target.decoder.layers_2.encoder_decoder_attention.value.kernel/.zarray b/checkpoint_10000/target.decoder.layers_2.encoder_decoder_attention.value.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_2.encoder_decoder_attention.value.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.decoder.layers_2.encoder_decoder_attention.value.kernel/0.0 b/checkpoint_10000/target.decoder.layers_2.encoder_decoder_attention.value.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..96357ff2886ceb9b17f0c1b3211fe720e86f2697 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_2.encoder_decoder_attention.value.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27ce0fa22d5fef1e04178ae9b9d1517d06bc61444febda331c4cb4eea9668ba3 +size 4396677 diff --git a/checkpoint_10000/target.decoder.layers_2.mlp.wi_0.kernel/.zarray b/checkpoint_10000/target.decoder.layers_2.mlp.wi_0.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_2.mlp.wi_0.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.decoder.layers_2.mlp.wi_0.kernel/0.0 b/checkpoint_10000/target.decoder.layers_2.mlp.wi_0.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..46ec102b5219230466cb3ad2507e5640eca302ab --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_2.mlp.wi_0.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7435a826615f8dc7f07d05c8cae12a3eaeab9c06cea3c384a67a91e2feb116ca +size 22666718 diff --git a/checkpoint_10000/target.decoder.layers_2.mlp.wi_1.kernel/.zarray b/checkpoint_10000/target.decoder.layers_2.mlp.wi_1.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_2.mlp.wi_1.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.decoder.layers_2.mlp.wi_1.kernel/0.0 b/checkpoint_10000/target.decoder.layers_2.mlp.wi_1.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..7f025ae3ac3e71b62dfaea813fbf435182e7d1fc --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_2.mlp.wi_1.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59f15f50a73d8a3a8293383736d5e4a18e7136c180d4febf96d6fa31daf1634c +size 22676434 diff --git a/checkpoint_10000/target.decoder.layers_2.mlp.wo.kernel/.zarray b/checkpoint_10000/target.decoder.layers_2.mlp.wo.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..ee708e4e956632ad238f9d97ec32a9fb108bec46 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_2.mlp.wo.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1ae155dfdfb8a9700e2a43a95cf96febfc4df5aeb3ea32aae29dde773629c2d +size 180 diff --git a/checkpoint_10000/target.decoder.layers_2.mlp.wo.kernel/0.0 b/checkpoint_10000/target.decoder.layers_2.mlp.wo.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..c8d00f2b3d2e08f5202570e43a42760af2680152 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_2.mlp.wo.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f5a4b2b05970ab2ee3bc4e7034826d1c683adbedea4a1c7327d0d627c8e4ce3 +size 22703541 diff --git a/checkpoint_10000/target.decoder.layers_2.pre_cross_attention_layer_norm.scale/.zarray b/checkpoint_10000/target.decoder.layers_2.pre_cross_attention_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_2.pre_cross_attention_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.decoder.layers_2.pre_cross_attention_layer_norm.scale/0 b/checkpoint_10000/target.decoder.layers_2.pre_cross_attention_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..561073dbb28fa3e8087beada49579b65096609e5 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_2.pre_cross_attention_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cccc5c29749374d5841ac964e249c09765b29e7ab9b3c54c770b0db771fb8959 +size 5638 diff --git a/checkpoint_10000/target.decoder.layers_2.pre_mlp_layer_norm.scale/.zarray b/checkpoint_10000/target.decoder.layers_2.pre_mlp_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_2.pre_mlp_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.decoder.layers_2.pre_mlp_layer_norm.scale/0 b/checkpoint_10000/target.decoder.layers_2.pre_mlp_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..0169880708a092f2821b52dbc84490ab498b965b --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_2.pre_mlp_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:451182ece12666922a402aaec87e76e05c7f29254b09ef07e3691d4701f1bdba +size 5534 diff --git a/checkpoint_10000/target.decoder.layers_2.pre_self_attention_layer_norm.scale/.zarray b/checkpoint_10000/target.decoder.layers_2.pre_self_attention_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_2.pre_self_attention_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.decoder.layers_2.pre_self_attention_layer_norm.scale/0 b/checkpoint_10000/target.decoder.layers_2.pre_self_attention_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..cb39c2afdc449c1fb44e19eeeb058577bec92944 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_2.pre_self_attention_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:daed22f85d3ea01e77911142f81c8e64cd61e1569c7400b37d86abbe43681f94 +size 5540 diff --git a/checkpoint_10000/target.decoder.layers_2.self_attention.key.kernel/.zarray b/checkpoint_10000/target.decoder.layers_2.self_attention.key.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_2.self_attention.key.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.decoder.layers_2.self_attention.key.kernel/0.0 b/checkpoint_10000/target.decoder.layers_2.self_attention.key.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..136965bab9c6f75352ce76f1710f8175606a4b8d --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_2.self_attention.key.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f101b3c4b31371ed79425acda268a668103ecbff6de62aa79237e56883813bd +size 4396451 diff --git a/checkpoint_10000/target.decoder.layers_2.self_attention.out.kernel/.zarray b/checkpoint_10000/target.decoder.layers_2.self_attention.out.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..120376f0f1c75a027343e7502f698ea154b8203e --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_2.self_attention.out.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44605053a4b2d28515bc45d11e69af731efe343126253bbb183fcfd595a20b0b +size 178 diff --git a/checkpoint_10000/target.decoder.layers_2.self_attention.out.kernel/0.0 b/checkpoint_10000/target.decoder.layers_2.self_attention.out.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..9d02df45bc03f7e61266b9e7aa9499790ba75da1 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_2.self_attention.out.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:300a21cb9be27d88b97fde1dadff8f15861745cb0cc1cd677b94e24fa39a38f3 +size 4384075 diff --git a/checkpoint_10000/target.decoder.layers_2.self_attention.query.kernel/.zarray b/checkpoint_10000/target.decoder.layers_2.self_attention.query.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_2.self_attention.query.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.decoder.layers_2.self_attention.query.kernel/0.0 b/checkpoint_10000/target.decoder.layers_2.self_attention.query.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..1eb4c9c97e00bd241d5b178c659f172d9eb6d285 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_2.self_attention.query.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67c2382126eb7ab218a8a5679a8a211a440241ee5a735601f91c87e8c412c3a0 +size 4386666 diff --git a/checkpoint_10000/target.decoder.layers_2.self_attention.value.kernel/.zarray b/checkpoint_10000/target.decoder.layers_2.self_attention.value.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_2.self_attention.value.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.decoder.layers_2.self_attention.value.kernel/0.0 b/checkpoint_10000/target.decoder.layers_2.self_attention.value.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..3ad136e8abc16662c9c80e8831be0cf0cca272af --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_2.self_attention.value.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb29b6dea1dcefd2e62111fa9a38498cfdf5dfaa7e108a27cdcfd691ae311e70 +size 4395902 diff --git a/checkpoint_10000/target.decoder.layers_3.encoder_decoder_attention.key.kernel/.zarray b/checkpoint_10000/target.decoder.layers_3.encoder_decoder_attention.key.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_3.encoder_decoder_attention.key.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.decoder.layers_3.encoder_decoder_attention.key.kernel/0.0 b/checkpoint_10000/target.decoder.layers_3.encoder_decoder_attention.key.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..a35018a3fcb1d8196e16b6bd4dd1c0a48a1d418f --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_3.encoder_decoder_attention.key.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d3fe23d3b2fe614a953bafd6241912f4eeb45497313f2a19722c0b6453e53b2 +size 4395140 diff --git a/checkpoint_10000/target.decoder.layers_3.encoder_decoder_attention.out.kernel/.zarray b/checkpoint_10000/target.decoder.layers_3.encoder_decoder_attention.out.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..120376f0f1c75a027343e7502f698ea154b8203e --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_3.encoder_decoder_attention.out.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44605053a4b2d28515bc45d11e69af731efe343126253bbb183fcfd595a20b0b +size 178 diff --git a/checkpoint_10000/target.decoder.layers_3.encoder_decoder_attention.out.kernel/0.0 b/checkpoint_10000/target.decoder.layers_3.encoder_decoder_attention.out.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..1b663c53a32eb29d99ad8396c1e84dd17c035b12 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_3.encoder_decoder_attention.out.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b5eb4647be8034c9460b363622ca0d701ad6988112188dc15dc2bfc28f2d4cf +size 4386097 diff --git a/checkpoint_10000/target.decoder.layers_3.encoder_decoder_attention.query.kernel/.zarray b/checkpoint_10000/target.decoder.layers_3.encoder_decoder_attention.query.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_3.encoder_decoder_attention.query.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.decoder.layers_3.encoder_decoder_attention.query.kernel/0.0 b/checkpoint_10000/target.decoder.layers_3.encoder_decoder_attention.query.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..46f60a4aaf3d3e3e9b2bb72170cee6d397575aea --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_3.encoder_decoder_attention.query.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eeba346f78bacd24b1099a63315c4ed25baa28373821a56f10ff1d710de48ac8 +size 4387415 diff --git a/checkpoint_10000/target.decoder.layers_3.encoder_decoder_attention.value.kernel/.zarray b/checkpoint_10000/target.decoder.layers_3.encoder_decoder_attention.value.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_3.encoder_decoder_attention.value.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.decoder.layers_3.encoder_decoder_attention.value.kernel/0.0 b/checkpoint_10000/target.decoder.layers_3.encoder_decoder_attention.value.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..381fed5969a744587f07251af4753a5b530f6253 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_3.encoder_decoder_attention.value.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f45b337ef7389f3e475a66bfd7138ad3e4b904c337d1a3319f208af6954cbda +size 4398368 diff --git a/checkpoint_10000/target.decoder.layers_3.mlp.wi_0.kernel/.zarray b/checkpoint_10000/target.decoder.layers_3.mlp.wi_0.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_3.mlp.wi_0.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.decoder.layers_3.mlp.wi_0.kernel/0.0 b/checkpoint_10000/target.decoder.layers_3.mlp.wi_0.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..e4e9b2a48cfd442bf0e2a6b4adbd7fdb0d15c009 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_3.mlp.wi_0.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0c3e306130f7e884244ed692da9c377be64b3800d15cb390628cc5a38bfbb26 +size 22664656 diff --git a/checkpoint_10000/target.decoder.layers_3.mlp.wi_1.kernel/.zarray b/checkpoint_10000/target.decoder.layers_3.mlp.wi_1.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_3.mlp.wi_1.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.decoder.layers_3.mlp.wi_1.kernel/0.0 b/checkpoint_10000/target.decoder.layers_3.mlp.wi_1.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..c776c18525951bbd13da60e6e3e0113f3057cebb --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_3.mlp.wi_1.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a422e5e75d04b88445aa1fa19e417f62116e98b921cb07504a2a357beaf8ea8e +size 22674317 diff --git a/checkpoint_10000/target.decoder.layers_3.mlp.wo.kernel/.zarray b/checkpoint_10000/target.decoder.layers_3.mlp.wo.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..ee708e4e956632ad238f9d97ec32a9fb108bec46 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_3.mlp.wo.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1ae155dfdfb8a9700e2a43a95cf96febfc4df5aeb3ea32aae29dde773629c2d +size 180 diff --git a/checkpoint_10000/target.decoder.layers_3.mlp.wo.kernel/0.0 b/checkpoint_10000/target.decoder.layers_3.mlp.wo.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..434cdb70c8d4a28d3e647f96caed4828c5efd54f --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_3.mlp.wo.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44620f380203a0719236c97ecf39f140a2f5fd38c0b870ae0f951b1be88f6d0 +size 22703996 diff --git a/checkpoint_10000/target.decoder.layers_3.pre_cross_attention_layer_norm.scale/.zarray b/checkpoint_10000/target.decoder.layers_3.pre_cross_attention_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_3.pre_cross_attention_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.decoder.layers_3.pre_cross_attention_layer_norm.scale/0 b/checkpoint_10000/target.decoder.layers_3.pre_cross_attention_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..5a3dd50f1e51ea8257060cd1600568ff68a84ad6 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_3.pre_cross_attention_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07ec6f8caf81f1c301a7ec29dec637704f92f30579581aac58b0d8b20fd6e974 +size 5563 diff --git a/checkpoint_10000/target.decoder.layers_3.pre_mlp_layer_norm.scale/.zarray b/checkpoint_10000/target.decoder.layers_3.pre_mlp_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_3.pre_mlp_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.decoder.layers_3.pre_mlp_layer_norm.scale/0 b/checkpoint_10000/target.decoder.layers_3.pre_mlp_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..a7722d2e4c7d32cecdac985a09df16b8f37254ae --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_3.pre_mlp_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:567d7a3ae58ecfc471251fdf01a47fd12c9608f75f26a965ae49e864955a922b +size 5470 diff --git a/checkpoint_10000/target.decoder.layers_3.pre_self_attention_layer_norm.scale/.zarray b/checkpoint_10000/target.decoder.layers_3.pre_self_attention_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_3.pre_self_attention_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.decoder.layers_3.pre_self_attention_layer_norm.scale/0 b/checkpoint_10000/target.decoder.layers_3.pre_self_attention_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..027918614ee9d441908a6ef449db79111f0ba4b1 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_3.pre_self_attention_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1725bce554d3baea32c7f16f23831230e06a2d9e5d9abb8ec357994c64c058a6 +size 5505 diff --git a/checkpoint_10000/target.decoder.layers_3.self_attention.key.kernel/.zarray b/checkpoint_10000/target.decoder.layers_3.self_attention.key.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_3.self_attention.key.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.decoder.layers_3.self_attention.key.kernel/0.0 b/checkpoint_10000/target.decoder.layers_3.self_attention.key.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..724d8acf93cf17a121bd8153a50d71edd7527b25 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_3.self_attention.key.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7a5ed0d25a43b87db6c8bfea8f179fbc34cda0aea3e8086ea042d6b66d50402 +size 4396237 diff --git a/checkpoint_10000/target.decoder.layers_3.self_attention.out.kernel/.zarray b/checkpoint_10000/target.decoder.layers_3.self_attention.out.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..120376f0f1c75a027343e7502f698ea154b8203e --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_3.self_attention.out.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44605053a4b2d28515bc45d11e69af731efe343126253bbb183fcfd595a20b0b +size 178 diff --git a/checkpoint_10000/target.decoder.layers_3.self_attention.out.kernel/0.0 b/checkpoint_10000/target.decoder.layers_3.self_attention.out.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..ee3bcff7a1bb44f03774480f87aebdc733aeab77 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_3.self_attention.out.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe74614fe6262e8a3ef228b87c7500dc68f1322ab398036af2d61d105682842c +size 4384723 diff --git a/checkpoint_10000/target.decoder.layers_3.self_attention.query.kernel/.zarray b/checkpoint_10000/target.decoder.layers_3.self_attention.query.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_3.self_attention.query.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.decoder.layers_3.self_attention.query.kernel/0.0 b/checkpoint_10000/target.decoder.layers_3.self_attention.query.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..df1b3b2e89bd179f9659be847b31d87c6488ff3e --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_3.self_attention.query.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee3dd36ab7230a65091bb4144fbace522ab678f0218a59113e9df4291c207ef7 +size 4387063 diff --git a/checkpoint_10000/target.decoder.layers_3.self_attention.value.kernel/.zarray b/checkpoint_10000/target.decoder.layers_3.self_attention.value.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_3.self_attention.value.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.decoder.layers_3.self_attention.value.kernel/0.0 b/checkpoint_10000/target.decoder.layers_3.self_attention.value.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..26ada86a0d83644f09cec517d969a6118441de45 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_3.self_attention.value.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9bfa35bdf0cb52c760a25c65abedfdc3b359922b46b8c5e8182504de34aa084 +size 4395208 diff --git a/checkpoint_10000/target.decoder.layers_4.encoder_decoder_attention.key.kernel/.zarray b/checkpoint_10000/target.decoder.layers_4.encoder_decoder_attention.key.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_4.encoder_decoder_attention.key.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.decoder.layers_4.encoder_decoder_attention.key.kernel/0.0 b/checkpoint_10000/target.decoder.layers_4.encoder_decoder_attention.key.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..fb9187c11436abc98372d2b5663081f0b91b9992 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_4.encoder_decoder_attention.key.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0271e87a1ecb39595664e58e482045ae75d9b60103230977ae7d59767730717e +size 4394863 diff --git a/checkpoint_10000/target.decoder.layers_4.encoder_decoder_attention.out.kernel/.zarray b/checkpoint_10000/target.decoder.layers_4.encoder_decoder_attention.out.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..120376f0f1c75a027343e7502f698ea154b8203e --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_4.encoder_decoder_attention.out.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44605053a4b2d28515bc45d11e69af731efe343126253bbb183fcfd595a20b0b +size 178 diff --git a/checkpoint_10000/target.decoder.layers_4.encoder_decoder_attention.out.kernel/0.0 b/checkpoint_10000/target.decoder.layers_4.encoder_decoder_attention.out.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..925063b18697bbe6a66202e808caa636f31fea54 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_4.encoder_decoder_attention.out.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b7f21e1c8cef46e2bccea728398b080b22e813270aeffbc1346160529d071a5 +size 4385673 diff --git a/checkpoint_10000/target.decoder.layers_4.encoder_decoder_attention.query.kernel/.zarray b/checkpoint_10000/target.decoder.layers_4.encoder_decoder_attention.query.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_4.encoder_decoder_attention.query.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.decoder.layers_4.encoder_decoder_attention.query.kernel/0.0 b/checkpoint_10000/target.decoder.layers_4.encoder_decoder_attention.query.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..363615b511d6a4e8f0193d5479c3bf16014f6210 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_4.encoder_decoder_attention.query.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6f8203def70faf2591a19952ba4ba2c2b0b30744a6ca3f9e52ab45ec8cd1f99 +size 4387598 diff --git a/checkpoint_10000/target.decoder.layers_4.encoder_decoder_attention.value.kernel/.zarray b/checkpoint_10000/target.decoder.layers_4.encoder_decoder_attention.value.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_4.encoder_decoder_attention.value.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.decoder.layers_4.encoder_decoder_attention.value.kernel/0.0 b/checkpoint_10000/target.decoder.layers_4.encoder_decoder_attention.value.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..50027c23986e082683e65904ac2d647859eeaa8f --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_4.encoder_decoder_attention.value.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1c23312a9384300465a380ac45f8648c744d633cdf3a34ad7f2da6252bcfe10 +size 4393640 diff --git a/checkpoint_10000/target.decoder.layers_4.mlp.wi_0.kernel/.zarray b/checkpoint_10000/target.decoder.layers_4.mlp.wi_0.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_4.mlp.wi_0.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.decoder.layers_4.mlp.wi_0.kernel/0.0 b/checkpoint_10000/target.decoder.layers_4.mlp.wi_0.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..ed75d14a513f34cef124759318674e4b945b2e30 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_4.mlp.wi_0.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0d47c15c1758f36166383eb92d730e464c4cd9deb6cca0cc56cd0bd2b8a59dd +size 22662101 diff --git a/checkpoint_10000/target.decoder.layers_4.mlp.wi_1.kernel/.zarray b/checkpoint_10000/target.decoder.layers_4.mlp.wi_1.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_4.mlp.wi_1.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.decoder.layers_4.mlp.wi_1.kernel/0.0 b/checkpoint_10000/target.decoder.layers_4.mlp.wi_1.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..93b8d8a9a63eb080b18c6b3e459dafb4ecba50bd --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_4.mlp.wi_1.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e764bc940c898db47a9f39d4ef1d1ee0a8a6aa24344a8a34ae632c2ce9366c2c +size 22672220 diff --git a/checkpoint_10000/target.decoder.layers_4.mlp.wo.kernel/.zarray b/checkpoint_10000/target.decoder.layers_4.mlp.wo.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..ee708e4e956632ad238f9d97ec32a9fb108bec46 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_4.mlp.wo.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1ae155dfdfb8a9700e2a43a95cf96febfc4df5aeb3ea32aae29dde773629c2d +size 180 diff --git a/checkpoint_10000/target.decoder.layers_4.mlp.wo.kernel/0.0 b/checkpoint_10000/target.decoder.layers_4.mlp.wo.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..01e09f3223bf15c60e0284b60a2e2e3f09217f51 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_4.mlp.wo.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca4912c83a0ccbfe29974a92fbe03a5c3dd8fb3a91fd1d5ba1ef099374e3232c +size 22706290 diff --git a/checkpoint_10000/target.decoder.layers_4.pre_cross_attention_layer_norm.scale/.zarray b/checkpoint_10000/target.decoder.layers_4.pre_cross_attention_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_4.pre_cross_attention_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.decoder.layers_4.pre_cross_attention_layer_norm.scale/0 b/checkpoint_10000/target.decoder.layers_4.pre_cross_attention_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..c519c40330ba23a0091c22f6bebd9ab4b03f27ab --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_4.pre_cross_attention_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c46132feed92099d9dc0ff5b63e1b837eb178d0c6f4b6769c65bf8c0c9f9e173 +size 5568 diff --git a/checkpoint_10000/target.decoder.layers_4.pre_mlp_layer_norm.scale/.zarray b/checkpoint_10000/target.decoder.layers_4.pre_mlp_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_4.pre_mlp_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.decoder.layers_4.pre_mlp_layer_norm.scale/0 b/checkpoint_10000/target.decoder.layers_4.pre_mlp_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..ab6d198de6246f063e9b2ec0776857c565415c8a --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_4.pre_mlp_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26a9a7cde26f1fef372bf3a8d6c44a72709ce23c112ef347f682af72bdc2c32f +size 5432 diff --git a/checkpoint_10000/target.decoder.layers_4.pre_self_attention_layer_norm.scale/.zarray b/checkpoint_10000/target.decoder.layers_4.pre_self_attention_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_4.pre_self_attention_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.decoder.layers_4.pre_self_attention_layer_norm.scale/0 b/checkpoint_10000/target.decoder.layers_4.pre_self_attention_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..5d96f8c63ed1e3df342805cfdac9d7a80c713436 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_4.pre_self_attention_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b195d271079c15effa47ffc7a93909073d889ddf96bd0fcdf9dc295ab34f01c +size 5486 diff --git a/checkpoint_10000/target.decoder.layers_4.self_attention.key.kernel/.zarray b/checkpoint_10000/target.decoder.layers_4.self_attention.key.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_4.self_attention.key.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.decoder.layers_4.self_attention.key.kernel/0.0 b/checkpoint_10000/target.decoder.layers_4.self_attention.key.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..8a26c0fbe23631f578e5e19f5bc79c3dc3927f81 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_4.self_attention.key.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1178ea3dd6488c26901e589f52cd2a008620a5a28dd52e59b279f52a155eaba +size 4396471 diff --git a/checkpoint_10000/target.decoder.layers_4.self_attention.out.kernel/.zarray b/checkpoint_10000/target.decoder.layers_4.self_attention.out.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..120376f0f1c75a027343e7502f698ea154b8203e --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_4.self_attention.out.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44605053a4b2d28515bc45d11e69af731efe343126253bbb183fcfd595a20b0b +size 178 diff --git a/checkpoint_10000/target.decoder.layers_4.self_attention.out.kernel/0.0 b/checkpoint_10000/target.decoder.layers_4.self_attention.out.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..5466ce42b4fae481dbfc0766be198a97b153b440 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_4.self_attention.out.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88cacf4296b819eebeb4c75eac30ea3cd3aa88665ed80f5efbb5139cbe850615 +size 4385878 diff --git a/checkpoint_10000/target.decoder.layers_4.self_attention.query.kernel/.zarray b/checkpoint_10000/target.decoder.layers_4.self_attention.query.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_4.self_attention.query.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.decoder.layers_4.self_attention.query.kernel/0.0 b/checkpoint_10000/target.decoder.layers_4.self_attention.query.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..a3b1ba4418d3e04fa50a7b7566fda5cb19b1fda9 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_4.self_attention.query.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5a1e15409075087e7b4346fb887e848fa85b62d95c0f4bb8440076eab66693f +size 4387235 diff --git a/checkpoint_10000/target.decoder.layers_4.self_attention.value.kernel/.zarray b/checkpoint_10000/target.decoder.layers_4.self_attention.value.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_4.self_attention.value.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.decoder.layers_4.self_attention.value.kernel/0.0 b/checkpoint_10000/target.decoder.layers_4.self_attention.value.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..37de8c53c05e284f8e769ff72edf8eabe5f24558 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_4.self_attention.value.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66383dd6be1b6eba19e5eb3c86e9d5abe2159d6c3f22d5e31fd7b17fd99f5e39 +size 4395399 diff --git a/checkpoint_10000/target.decoder.layers_5.encoder_decoder_attention.key.kernel/.zarray b/checkpoint_10000/target.decoder.layers_5.encoder_decoder_attention.key.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_5.encoder_decoder_attention.key.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.decoder.layers_5.encoder_decoder_attention.key.kernel/0.0 b/checkpoint_10000/target.decoder.layers_5.encoder_decoder_attention.key.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..0a73e6b27852f7ee3fb36bbaffd3ee1a8f793468 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_5.encoder_decoder_attention.key.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a12dd063eb5ac35abf137209b4f68f1ed4d784ceb7355be1d34a450b6c218f6 +size 4394240 diff --git a/checkpoint_10000/target.decoder.layers_5.encoder_decoder_attention.out.kernel/.zarray b/checkpoint_10000/target.decoder.layers_5.encoder_decoder_attention.out.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..120376f0f1c75a027343e7502f698ea154b8203e --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_5.encoder_decoder_attention.out.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44605053a4b2d28515bc45d11e69af731efe343126253bbb183fcfd595a20b0b +size 178 diff --git a/checkpoint_10000/target.decoder.layers_5.encoder_decoder_attention.out.kernel/0.0 b/checkpoint_10000/target.decoder.layers_5.encoder_decoder_attention.out.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..e905e923b2d7aa1d0a77a1f70288c38d0f7a2fde --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_5.encoder_decoder_attention.out.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a313b2a778933401d623532b4c79e97deaeb538f42143f0cf27c60de9c53cf1 +size 4386963 diff --git a/checkpoint_10000/target.decoder.layers_5.encoder_decoder_attention.query.kernel/.zarray b/checkpoint_10000/target.decoder.layers_5.encoder_decoder_attention.query.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_5.encoder_decoder_attention.query.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.decoder.layers_5.encoder_decoder_attention.query.kernel/0.0 b/checkpoint_10000/target.decoder.layers_5.encoder_decoder_attention.query.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..3aafd6ebccf7cf7d04cd418e8136b857cf87a26b --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_5.encoder_decoder_attention.query.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5465d6731ce4cdd2411ae195fb9ef7993d446248a1b078cc8973d9ed0c20a02e +size 4387783 diff --git a/checkpoint_10000/target.decoder.layers_5.encoder_decoder_attention.value.kernel/.zarray b/checkpoint_10000/target.decoder.layers_5.encoder_decoder_attention.value.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_5.encoder_decoder_attention.value.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.decoder.layers_5.encoder_decoder_attention.value.kernel/0.0 b/checkpoint_10000/target.decoder.layers_5.encoder_decoder_attention.value.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..c2fcdfce0f48d8a713ed5c6187b614b5b330f697 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_5.encoder_decoder_attention.value.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fb8e4aa6afe82527a3b6087598e8fa21f925ad759497b80f9b68fa54c1557bf +size 4394926 diff --git a/checkpoint_10000/target.decoder.layers_5.mlp.wi_0.kernel/.zarray b/checkpoint_10000/target.decoder.layers_5.mlp.wi_0.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_5.mlp.wi_0.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.decoder.layers_5.mlp.wi_0.kernel/0.0 b/checkpoint_10000/target.decoder.layers_5.mlp.wi_0.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..946eb01ea546ed351deac046a3986b71530965f0 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_5.mlp.wi_0.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8f2b6d1c6548c3e6617c1df59342f40b199bc7553aa11483198b635ce4d5687 +size 22664333 diff --git a/checkpoint_10000/target.decoder.layers_5.mlp.wi_1.kernel/.zarray b/checkpoint_10000/target.decoder.layers_5.mlp.wi_1.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_5.mlp.wi_1.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.decoder.layers_5.mlp.wi_1.kernel/0.0 b/checkpoint_10000/target.decoder.layers_5.mlp.wi_1.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..a62187aca5acea5c7872b79c3d3d3b039cfdd389 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_5.mlp.wi_1.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c80f308e54241e95be40dc38cbc31ce85482f446cdc0528fd087f315f597b71 +size 22672508 diff --git a/checkpoint_10000/target.decoder.layers_5.mlp.wo.kernel/.zarray b/checkpoint_10000/target.decoder.layers_5.mlp.wo.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..ee708e4e956632ad238f9d97ec32a9fb108bec46 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_5.mlp.wo.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1ae155dfdfb8a9700e2a43a95cf96febfc4df5aeb3ea32aae29dde773629c2d +size 180 diff --git a/checkpoint_10000/target.decoder.layers_5.mlp.wo.kernel/0.0 b/checkpoint_10000/target.decoder.layers_5.mlp.wo.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..735f5916fc18a0b3bca125dd68f75b87ca6ea4e8 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_5.mlp.wo.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f0c49317290089c02cd6450c7375d3e5089cca7d92f17a948f42ce6745311da +size 22706751 diff --git a/checkpoint_10000/target.decoder.layers_5.pre_cross_attention_layer_norm.scale/.zarray b/checkpoint_10000/target.decoder.layers_5.pre_cross_attention_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_5.pre_cross_attention_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.decoder.layers_5.pre_cross_attention_layer_norm.scale/0 b/checkpoint_10000/target.decoder.layers_5.pre_cross_attention_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..a5872f099de8cd9e38296a3fb5f37be998d53532 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_5.pre_cross_attention_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8881bb93812fc9f13c153b04e0455e3bc2c06a5ed7a726445cf907181996a4a +size 5589 diff --git a/checkpoint_10000/target.decoder.layers_5.pre_mlp_layer_norm.scale/.zarray b/checkpoint_10000/target.decoder.layers_5.pre_mlp_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_5.pre_mlp_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.decoder.layers_5.pre_mlp_layer_norm.scale/0 b/checkpoint_10000/target.decoder.layers_5.pre_mlp_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..d007b686d245b2c6a0496c4b3abfc8e3c03441d0 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_5.pre_mlp_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be9074224780897db8524828f61cdea9e6fa0a9022b7de4ff4a87f662df29393 +size 5401 diff --git a/checkpoint_10000/target.decoder.layers_5.pre_self_attention_layer_norm.scale/.zarray b/checkpoint_10000/target.decoder.layers_5.pre_self_attention_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_5.pre_self_attention_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.decoder.layers_5.pre_self_attention_layer_norm.scale/0 b/checkpoint_10000/target.decoder.layers_5.pre_self_attention_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..8d0672052a269783c609e977e9190e9da71bf001 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_5.pre_self_attention_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc0a0209f432e5d7df6452223c1f44a0cf637b226bffbec1567c99f173185dfe +size 5535 diff --git a/checkpoint_10000/target.decoder.layers_5.self_attention.key.kernel/.zarray b/checkpoint_10000/target.decoder.layers_5.self_attention.key.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_5.self_attention.key.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.decoder.layers_5.self_attention.key.kernel/0.0 b/checkpoint_10000/target.decoder.layers_5.self_attention.key.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..046c6a374703d2834a508a7d92d17c6b451e3718 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_5.self_attention.key.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f585046b3649b34600805bba45579ef8da4e9be4386deaa87b92ffc456a24580 +size 4396915 diff --git a/checkpoint_10000/target.decoder.layers_5.self_attention.out.kernel/.zarray b/checkpoint_10000/target.decoder.layers_5.self_attention.out.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..120376f0f1c75a027343e7502f698ea154b8203e --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_5.self_attention.out.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44605053a4b2d28515bc45d11e69af731efe343126253bbb183fcfd595a20b0b +size 178 diff --git a/checkpoint_10000/target.decoder.layers_5.self_attention.out.kernel/0.0 b/checkpoint_10000/target.decoder.layers_5.self_attention.out.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..48051405dcefdffd11cc3742ed7ade946a98e8a2 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_5.self_attention.out.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a91064aaa2f05290e7c6816d9c86920a749362c51e8afb392f0825355061a9e +size 4386657 diff --git a/checkpoint_10000/target.decoder.layers_5.self_attention.query.kernel/.zarray b/checkpoint_10000/target.decoder.layers_5.self_attention.query.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_5.self_attention.query.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.decoder.layers_5.self_attention.query.kernel/0.0 b/checkpoint_10000/target.decoder.layers_5.self_attention.query.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..54909430d9b71dd6bd6de65abca01d409870f556 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_5.self_attention.query.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68d9b3e3d8728ef84fa43cc7679f2aec183aa558b560a3d27996ac654dc4098d +size 4388351 diff --git a/checkpoint_10000/target.decoder.layers_5.self_attention.value.kernel/.zarray b/checkpoint_10000/target.decoder.layers_5.self_attention.value.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_5.self_attention.value.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.decoder.layers_5.self_attention.value.kernel/0.0 b/checkpoint_10000/target.decoder.layers_5.self_attention.value.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..869e6c16d2c3f4db58e80acc5178802c8eba4256 --- /dev/null +++ b/checkpoint_10000/target.decoder.layers_5.self_attention.value.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2669d5ae50b1013b7200cebde6d70ee408c322220cdf4c9adb7b6d9f0d8fea52 +size 4396633 diff --git a/checkpoint_10000/target.decoder.logits_dense.kernel/.zarray b/checkpoint_10000/target.decoder.logits_dense.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..bf7f497741fc1ebb8fe318ed563c4173cb8b6aa8 --- /dev/null +++ b/checkpoint_10000/target.decoder.logits_dense.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8316f3c323987cd8fc2778c703441506e06eb23512fe8e96b28913277ab60a96 +size 178 diff --git a/checkpoint_10000/target.decoder.logits_dense.kernel/0.0 b/checkpoint_10000/target.decoder.logits_dense.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..74785ce89ea098c161dda51d985fc5902acf6453 --- /dev/null +++ b/checkpoint_10000/target.decoder.logits_dense.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e3572f96f586fb74e8a77635863a162065b9b6607f77cb12d2ddc9494fc2973 +size 2188761 diff --git a/checkpoint_10000/target.decoder.relpos_bias.rel_embedding/.zarray b/checkpoint_10000/target.decoder.relpos_bias.rel_embedding/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f0ebb438b0df77a995f437c9cf19936cd0a38f79 --- /dev/null +++ b/checkpoint_10000/target.decoder.relpos_bias.rel_embedding/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7028b3f2007ef72746d64487314ca6723cab7f9713ecb30bd30b73a22418e831 +size 172 diff --git a/checkpoint_10000/target.decoder.relpos_bias.rel_embedding/0.0 b/checkpoint_10000/target.decoder.relpos_bias.rel_embedding/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..b8c7de67c0e755a341fe34f18bd8ce7a9542e2a5 --- /dev/null +++ b/checkpoint_10000/target.decoder.relpos_bias.rel_embedding/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dac0b9f54464d5f8d180641900fe523a3385147ba487db501a2a54d292b7dc3 +size 1473 diff --git a/checkpoint_10000/target.encoder.encoder_norm.scale/.zarray b/checkpoint_10000/target.encoder.encoder_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.encoder.encoder_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.encoder.encoder_norm.scale/0 b/checkpoint_10000/target.encoder.encoder_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..cbd0bc83e45df17999d8b5077e8978d154f7dffd --- /dev/null +++ b/checkpoint_10000/target.encoder.encoder_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a7adaaf8becc61bbba2b3b62446a98aca1993734526be05962f48d8b6e11476 +size 5533 diff --git a/checkpoint_10000/target.encoder.layers_0.attention.key.kernel/.zarray b/checkpoint_10000/target.encoder.layers_0.attention.key.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_0.attention.key.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_0.attention.key.kernel/0.0 b/checkpoint_10000/target.encoder.layers_0.attention.key.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..15f0d2f96f60a1b1bedeebd483e899ced404649a --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_0.attention.key.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48db2a37bdca697b48881abc5b1a43f056760cf65bd99ac8f310115c2004b6ac +size 4396212 diff --git a/checkpoint_10000/target.encoder.layers_0.attention.out.kernel/.zarray b/checkpoint_10000/target.encoder.layers_0.attention.out.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..120376f0f1c75a027343e7502f698ea154b8203e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_0.attention.out.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44605053a4b2d28515bc45d11e69af731efe343126253bbb183fcfd595a20b0b +size 178 diff --git a/checkpoint_10000/target.encoder.layers_0.attention.out.kernel/0.0 b/checkpoint_10000/target.encoder.layers_0.attention.out.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..dcdb519e65a08183916610cdebf9c934e1ae9d95 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_0.attention.out.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a95d4ddcbcb5cb9533b960c758f59eb7b53c0ae88300895eece369988b2759ee +size 4385824 diff --git a/checkpoint_10000/target.encoder.layers_0.attention.query.kernel/.zarray b/checkpoint_10000/target.encoder.layers_0.attention.query.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_0.attention.query.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_0.attention.query.kernel/0.0 b/checkpoint_10000/target.encoder.layers_0.attention.query.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..6203328dce8ee69f1d485a47eb6492e4bc5cca25 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_0.attention.query.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:271a2c3434dc0988a90e66c863cd610337d2db4d2ab89b87d30acacc728bda66 +size 4382886 diff --git a/checkpoint_10000/target.encoder.layers_0.attention.value.kernel/.zarray b/checkpoint_10000/target.encoder.layers_0.attention.value.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_0.attention.value.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_0.attention.value.kernel/0.0 b/checkpoint_10000/target.encoder.layers_0.attention.value.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..c66e74781ed9e2e324b79077d544ff2ebe5032a6 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_0.attention.value.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a2aad7e96883c589b897a572085b9472de29c2e075b479ef3c3fcef8bf54f18 +size 4397436 diff --git a/checkpoint_10000/target.encoder.layers_0.mlp.wi_0.kernel/.zarray b/checkpoint_10000/target.encoder.layers_0.mlp.wi_0.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_0.mlp.wi_0.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.encoder.layers_0.mlp.wi_0.kernel/0.0 b/checkpoint_10000/target.encoder.layers_0.mlp.wi_0.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..227b2844294c3fe619d8697da2fd1f97a1b99302 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_0.mlp.wi_0.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3eac8afe92f38f374941458e2d1bd8c2297376451d5d4b448cb200cb15c329a7 +size 22692284 diff --git a/checkpoint_10000/target.encoder.layers_0.mlp.wi_1.kernel/.zarray b/checkpoint_10000/target.encoder.layers_0.mlp.wi_1.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_0.mlp.wi_1.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.encoder.layers_0.mlp.wi_1.kernel/0.0 b/checkpoint_10000/target.encoder.layers_0.mlp.wi_1.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..00d8660829f10e84dcf3744f073cb44e8101cc40 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_0.mlp.wi_1.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47ce73d34a25102d67944527efec92990509fda860c3990e4280289128e55fe0 +size 22702885 diff --git a/checkpoint_10000/target.encoder.layers_0.mlp.wo.kernel/.zarray b/checkpoint_10000/target.encoder.layers_0.mlp.wo.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..ee708e4e956632ad238f9d97ec32a9fb108bec46 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_0.mlp.wo.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1ae155dfdfb8a9700e2a43a95cf96febfc4df5aeb3ea32aae29dde773629c2d +size 180 diff --git a/checkpoint_10000/target.encoder.layers_0.mlp.wo.kernel/0.0 b/checkpoint_10000/target.encoder.layers_0.mlp.wo.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..09b5106bd729076217763f3d137c04d9980ed81b --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_0.mlp.wo.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e97c31b664ea7368a06976b35598958b481862b2591cdf57dc58c8c33d3ae493 +size 22696740 diff --git a/checkpoint_10000/target.encoder.layers_0.pre_attention_layer_norm.scale/.zarray b/checkpoint_10000/target.encoder.layers_0.pre_attention_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_0.pre_attention_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.encoder.layers_0.pre_attention_layer_norm.scale/0 b/checkpoint_10000/target.encoder.layers_0.pre_attention_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..483326290ca4b505bde4d8d6c12d4c8da3d840dc --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_0.pre_attention_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39363f07c4bb5fc7ef5b72777cad2011df8f4873063f25473d277477f9e03f39 +size 5742 diff --git a/checkpoint_10000/target.encoder.layers_0.pre_mlp_layer_norm.scale/.zarray b/checkpoint_10000/target.encoder.layers_0.pre_mlp_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_0.pre_mlp_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.encoder.layers_0.pre_mlp_layer_norm.scale/0 b/checkpoint_10000/target.encoder.layers_0.pre_mlp_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..d3582d5230837089e5105d7701badb96303cb7d5 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_0.pre_mlp_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fdf850094fa9328a512ff0fbc720595e88e6020f547452a0e7720f27ba7f8fa +size 5533 diff --git a/checkpoint_10000/target.encoder.layers_1.attention.key.kernel/.zarray b/checkpoint_10000/target.encoder.layers_1.attention.key.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_1.attention.key.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_1.attention.key.kernel/0.0 b/checkpoint_10000/target.encoder.layers_1.attention.key.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..4e2e96f030f832d974b03de402bd16e2e96652b9 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_1.attention.key.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5653b2897a41c0eed1cfe06103105771eed07f7b64a8af8e3a6c1d1875ba3a36 +size 4396162 diff --git a/checkpoint_10000/target.encoder.layers_1.attention.out.kernel/.zarray b/checkpoint_10000/target.encoder.layers_1.attention.out.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..120376f0f1c75a027343e7502f698ea154b8203e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_1.attention.out.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44605053a4b2d28515bc45d11e69af731efe343126253bbb183fcfd595a20b0b +size 178 diff --git a/checkpoint_10000/target.encoder.layers_1.attention.out.kernel/0.0 b/checkpoint_10000/target.encoder.layers_1.attention.out.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..6f8778460c03ebe2c3c94b01895b9dcb332e5a7e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_1.attention.out.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de3975db32c9d9dd73430bf84d9d1448fd9269dc26f1fe795295d80f39b98f74 +size 4387629 diff --git a/checkpoint_10000/target.encoder.layers_1.attention.query.kernel/.zarray b/checkpoint_10000/target.encoder.layers_1.attention.query.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_1.attention.query.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_1.attention.query.kernel/0.0 b/checkpoint_10000/target.encoder.layers_1.attention.query.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..818ffe4c8cbd0640b39e72bea543856e3b6bb296 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_1.attention.query.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb4cfc74c1c7cc5616b00571ffefc1b06074c5532c84072053a4ea1c73add2ca +size 4385106 diff --git a/checkpoint_10000/target.encoder.layers_1.attention.value.kernel/.zarray b/checkpoint_10000/target.encoder.layers_1.attention.value.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_1.attention.value.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_1.attention.value.kernel/0.0 b/checkpoint_10000/target.encoder.layers_1.attention.value.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..9b41546abeae0b901aadf158e85c0004e1bf8aa3 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_1.attention.value.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cb6c55564ee993f7af391b832e471d199f537185d493d2e9dad69d2a3b6b89c +size 4395992 diff --git a/checkpoint_10000/target.encoder.layers_1.mlp.wi_0.kernel/.zarray b/checkpoint_10000/target.encoder.layers_1.mlp.wi_0.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_1.mlp.wi_0.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.encoder.layers_1.mlp.wi_0.kernel/0.0 b/checkpoint_10000/target.encoder.layers_1.mlp.wi_0.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..5107718bf78ad7a3d94d743bb7ee6e02c6df238e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_1.mlp.wi_0.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85ae60d0a8989638b61935d4dbcfc2a2f3cb795ca43a2b1acfe009da174e6987 +size 22698338 diff --git a/checkpoint_10000/target.encoder.layers_1.mlp.wi_1.kernel/.zarray b/checkpoint_10000/target.encoder.layers_1.mlp.wi_1.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_1.mlp.wi_1.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.encoder.layers_1.mlp.wi_1.kernel/0.0 b/checkpoint_10000/target.encoder.layers_1.mlp.wi_1.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..f65386697d8a49a3f34b590f0b1162cd5bb82525 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_1.mlp.wi_1.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46e9b3527d3dfeb1c494a798cf6a08314f1ab8cc386bd5775ed303a9c7dafa0a +size 22707513 diff --git a/checkpoint_10000/target.encoder.layers_1.mlp.wo.kernel/.zarray b/checkpoint_10000/target.encoder.layers_1.mlp.wo.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..ee708e4e956632ad238f9d97ec32a9fb108bec46 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_1.mlp.wo.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1ae155dfdfb8a9700e2a43a95cf96febfc4df5aeb3ea32aae29dde773629c2d +size 180 diff --git a/checkpoint_10000/target.encoder.layers_1.mlp.wo.kernel/0.0 b/checkpoint_10000/target.encoder.layers_1.mlp.wo.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..e7f47f6e2ec0794e55c8e8816cc476e04f2e1dbb --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_1.mlp.wo.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:311dea0d75c3d96b637187ad141ae087513280c896ba8f0e28c4b18e7a983268 +size 22669438 diff --git a/checkpoint_10000/target.encoder.layers_1.pre_attention_layer_norm.scale/.zarray b/checkpoint_10000/target.encoder.layers_1.pre_attention_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_1.pre_attention_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.encoder.layers_1.pre_attention_layer_norm.scale/0 b/checkpoint_10000/target.encoder.layers_1.pre_attention_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..f11d3f287c5bfb1f0d28818f7440c20e51463674 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_1.pre_attention_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a0e7e145a1015227a78dbc2ca708636f0794093779ef89a19319e4439ebe646 +size 5731 diff --git a/checkpoint_10000/target.encoder.layers_1.pre_mlp_layer_norm.scale/.zarray b/checkpoint_10000/target.encoder.layers_1.pre_mlp_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_1.pre_mlp_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.encoder.layers_1.pre_mlp_layer_norm.scale/0 b/checkpoint_10000/target.encoder.layers_1.pre_mlp_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..d8b7a618c9f4fd014f94bfa6b5986f50f35fa570 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_1.pre_mlp_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de143b41f02a4545e23ee944737792a21cf9cce791ade2d8c0eb54f7f872df34 +size 5723 diff --git a/checkpoint_10000/target.encoder.layers_10.attention.key.kernel/.zarray b/checkpoint_10000/target.encoder.layers_10.attention.key.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_10.attention.key.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_10.attention.key.kernel/0.0 b/checkpoint_10000/target.encoder.layers_10.attention.key.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..08a117c9986df9372ab6894e6778769231d3011f --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_10.attention.key.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c1b1316044c11fc07df3a55a1c29899b36cc12c4d49faadde9a94659a03c047 +size 4396169 diff --git a/checkpoint_10000/target.encoder.layers_10.attention.out.kernel/.zarray b/checkpoint_10000/target.encoder.layers_10.attention.out.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..120376f0f1c75a027343e7502f698ea154b8203e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_10.attention.out.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44605053a4b2d28515bc45d11e69af731efe343126253bbb183fcfd595a20b0b +size 178 diff --git a/checkpoint_10000/target.encoder.layers_10.attention.out.kernel/0.0 b/checkpoint_10000/target.encoder.layers_10.attention.out.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..e8ae91249693b4525bb30031c4c5ce4eb2f27264 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_10.attention.out.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384915fbd6a6f507c967e7e5a860666cf5e868a98b5fe114ff93f66be1668fd6 +size 4386060 diff --git a/checkpoint_10000/target.encoder.layers_10.attention.query.kernel/.zarray b/checkpoint_10000/target.encoder.layers_10.attention.query.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_10.attention.query.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_10.attention.query.kernel/0.0 b/checkpoint_10000/target.encoder.layers_10.attention.query.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..01e20b6407c098ecac67e07b2de5dc534ab45b91 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_10.attention.query.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01ae687ed60f6c28073c96fbc4707595b3226ba4dd133d1eda46c0894ea3f9b9 +size 4384337 diff --git a/checkpoint_10000/target.encoder.layers_10.attention.value.kernel/.zarray b/checkpoint_10000/target.encoder.layers_10.attention.value.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_10.attention.value.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_10.attention.value.kernel/0.0 b/checkpoint_10000/target.encoder.layers_10.attention.value.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..a4a1dddd5528900ef8d3506a44c8c28abb1b7f7e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_10.attention.value.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3db8a1273fddb5eb3ace0ca915d745a474631c6fa7eb106ef93d94afb1f0c69 +size 4396556 diff --git a/checkpoint_10000/target.encoder.layers_10.mlp.wi_0.kernel/.zarray b/checkpoint_10000/target.encoder.layers_10.mlp.wi_0.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_10.mlp.wi_0.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.encoder.layers_10.mlp.wi_0.kernel/0.0 b/checkpoint_10000/target.encoder.layers_10.mlp.wi_0.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..cf1327b8b7a2172ce10ee31e495659e92096c824 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_10.mlp.wi_0.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c59dc85d937b8122c26e8073f97670c452ce08cc2aaf23da49acfd90c92f09a7 +size 22690357 diff --git a/checkpoint_10000/target.encoder.layers_10.mlp.wi_1.kernel/.zarray b/checkpoint_10000/target.encoder.layers_10.mlp.wi_1.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_10.mlp.wi_1.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.encoder.layers_10.mlp.wi_1.kernel/0.0 b/checkpoint_10000/target.encoder.layers_10.mlp.wi_1.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..2afd8d0443491573ea70552faecce2994663da0b --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_10.mlp.wi_1.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae6663fc6132272c3bd3f0883bca89d7c85e988242f3855948c06d7fae9cded4 +size 22699013 diff --git a/checkpoint_10000/target.encoder.layers_10.mlp.wo.kernel/.zarray b/checkpoint_10000/target.encoder.layers_10.mlp.wo.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..ee708e4e956632ad238f9d97ec32a9fb108bec46 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_10.mlp.wo.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1ae155dfdfb8a9700e2a43a95cf96febfc4df5aeb3ea32aae29dde773629c2d +size 180 diff --git a/checkpoint_10000/target.encoder.layers_10.mlp.wo.kernel/0.0 b/checkpoint_10000/target.encoder.layers_10.mlp.wo.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..650f976f19161bf3e70ca654effa79a6be2354cd --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_10.mlp.wo.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:633bae7c1e205582cddd3adba69aaeb0f3b976f13ac3f34763e86387338b43ea +size 22696571 diff --git a/checkpoint_10000/target.encoder.layers_10.pre_attention_layer_norm.scale/.zarray b/checkpoint_10000/target.encoder.layers_10.pre_attention_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_10.pre_attention_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.encoder.layers_10.pre_attention_layer_norm.scale/0 b/checkpoint_10000/target.encoder.layers_10.pre_attention_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..7ee59740f8c7617014eb820081868b04439089e7 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_10.pre_attention_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0301f740fd4e2474a929944457fe06a2c09088a011597f3dd2077dfcc40e664 +size 5692 diff --git a/checkpoint_10000/target.encoder.layers_10.pre_mlp_layer_norm.scale/.zarray b/checkpoint_10000/target.encoder.layers_10.pre_mlp_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_10.pre_mlp_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.encoder.layers_10.pre_mlp_layer_norm.scale/0 b/checkpoint_10000/target.encoder.layers_10.pre_mlp_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..4bf2e16972813ee19b60898f0508b7aa96046827 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_10.pre_mlp_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8321204d02a879e9fa52c3c804611ac69f3a89f5cd6652b363fbc6d28ef72f3a +size 5622 diff --git a/checkpoint_10000/target.encoder.layers_11.attention.key.kernel/.zarray b/checkpoint_10000/target.encoder.layers_11.attention.key.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_11.attention.key.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_11.attention.key.kernel/0.0 b/checkpoint_10000/target.encoder.layers_11.attention.key.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..5891c3167d9a748efe9156d07b6d0cd5fb6eb8dd --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_11.attention.key.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d9a90bc1344a396050f300a1cd4b85ce7b28b239ba60dfe1721949abc1c696b +size 4396596 diff --git a/checkpoint_10000/target.encoder.layers_11.attention.out.kernel/.zarray b/checkpoint_10000/target.encoder.layers_11.attention.out.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..120376f0f1c75a027343e7502f698ea154b8203e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_11.attention.out.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44605053a4b2d28515bc45d11e69af731efe343126253bbb183fcfd595a20b0b +size 178 diff --git a/checkpoint_10000/target.encoder.layers_11.attention.out.kernel/0.0 b/checkpoint_10000/target.encoder.layers_11.attention.out.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..e62e4a987c570b93695e602b6fca95e4cab6d9dc --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_11.attention.out.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7510ec5823f22ff535cedadeadc7738d1b3707abdebf4e546584e7bce3a3e4a +size 4387667 diff --git a/checkpoint_10000/target.encoder.layers_11.attention.query.kernel/.zarray b/checkpoint_10000/target.encoder.layers_11.attention.query.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_11.attention.query.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_11.attention.query.kernel/0.0 b/checkpoint_10000/target.encoder.layers_11.attention.query.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..c4759dc3d615304b165905695e0dff803e098f72 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_11.attention.query.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03ac41698ae8e591e2769cfada8747dbd73ff6cc77064d8c9e7cf1fce5c38748 +size 4384611 diff --git a/checkpoint_10000/target.encoder.layers_11.attention.value.kernel/.zarray b/checkpoint_10000/target.encoder.layers_11.attention.value.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_11.attention.value.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_11.attention.value.kernel/0.0 b/checkpoint_10000/target.encoder.layers_11.attention.value.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..5654832924fd1e04c2b4c31eb72552a75d95db13 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_11.attention.value.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eaee79a696f6a48378a789963d2ee53c5efccdb0485de006f036473d029d2b84 +size 4397160 diff --git a/checkpoint_10000/target.encoder.layers_11.mlp.wi_0.kernel/.zarray b/checkpoint_10000/target.encoder.layers_11.mlp.wi_0.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_11.mlp.wi_0.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.encoder.layers_11.mlp.wi_0.kernel/0.0 b/checkpoint_10000/target.encoder.layers_11.mlp.wi_0.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..62dcf89ae8ba59825660815cc23e103d3ef26868 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_11.mlp.wi_0.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cce1400915a52a3e60b90c764da919b2782973ea317c2341b8188d9856f20b3 +size 22687988 diff --git a/checkpoint_10000/target.encoder.layers_11.mlp.wi_1.kernel/.zarray b/checkpoint_10000/target.encoder.layers_11.mlp.wi_1.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_11.mlp.wi_1.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.encoder.layers_11.mlp.wi_1.kernel/0.0 b/checkpoint_10000/target.encoder.layers_11.mlp.wi_1.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..78a72b75864f93113f64bd6929139916d5711116 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_11.mlp.wi_1.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e41d95412fad27ab98cd0cf9cdd3e575f28843380e13cf6ade03435612954f23 +size 22698482 diff --git a/checkpoint_10000/target.encoder.layers_11.mlp.wo.kernel/.zarray b/checkpoint_10000/target.encoder.layers_11.mlp.wo.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..ee708e4e956632ad238f9d97ec32a9fb108bec46 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_11.mlp.wo.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1ae155dfdfb8a9700e2a43a95cf96febfc4df5aeb3ea32aae29dde773629c2d +size 180 diff --git a/checkpoint_10000/target.encoder.layers_11.mlp.wo.kernel/0.0 b/checkpoint_10000/target.encoder.layers_11.mlp.wo.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..3b60a1884b4aee1043d7511ea3e198968167b56c --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_11.mlp.wo.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce2924e2a51d50a99e6fe63fd11b612b69764cf719b800e966ba507e519f96a2 +size 22696870 diff --git a/checkpoint_10000/target.encoder.layers_11.pre_attention_layer_norm.scale/.zarray b/checkpoint_10000/target.encoder.layers_11.pre_attention_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_11.pre_attention_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.encoder.layers_11.pre_attention_layer_norm.scale/0 b/checkpoint_10000/target.encoder.layers_11.pre_attention_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..87a824115555b8d3c960e77c392b63bbe68b379f --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_11.pre_attention_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31ba8607c992707a9a29860364f1e085bd6212e7489803ab756b5ebac760a3f4 +size 5701 diff --git a/checkpoint_10000/target.encoder.layers_11.pre_mlp_layer_norm.scale/.zarray b/checkpoint_10000/target.encoder.layers_11.pre_mlp_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_11.pre_mlp_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.encoder.layers_11.pre_mlp_layer_norm.scale/0 b/checkpoint_10000/target.encoder.layers_11.pre_mlp_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..f756fbd931fc2d1dc27ec65c8e04513206e66f6f --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_11.pre_mlp_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7949309ad1759535cd9f7382af83f73b618a097ce34630a6a2a5805a49f544c2 +size 5625 diff --git a/checkpoint_10000/target.encoder.layers_12.attention.key.kernel/.zarray b/checkpoint_10000/target.encoder.layers_12.attention.key.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_12.attention.key.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_12.attention.key.kernel/0.0 b/checkpoint_10000/target.encoder.layers_12.attention.key.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..7383dc99dba34597287d34ef2cc556bfcbb3ab33 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_12.attention.key.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8536fbcdc5d50a07c0b7cc93c7bc753152d875cf3090320421c489382d89839c +size 4396232 diff --git a/checkpoint_10000/target.encoder.layers_12.attention.out.kernel/.zarray b/checkpoint_10000/target.encoder.layers_12.attention.out.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..120376f0f1c75a027343e7502f698ea154b8203e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_12.attention.out.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44605053a4b2d28515bc45d11e69af731efe343126253bbb183fcfd595a20b0b +size 178 diff --git a/checkpoint_10000/target.encoder.layers_12.attention.out.kernel/0.0 b/checkpoint_10000/target.encoder.layers_12.attention.out.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..a2e12df40d70a41f99753291e87b83f724edbbe5 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_12.attention.out.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:634a992a6f76f856f016003fb59e2c92cfdfab42c1dfb24b35940ca280134e29 +size 4387122 diff --git a/checkpoint_10000/target.encoder.layers_12.attention.query.kernel/.zarray b/checkpoint_10000/target.encoder.layers_12.attention.query.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_12.attention.query.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_12.attention.query.kernel/0.0 b/checkpoint_10000/target.encoder.layers_12.attention.query.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..e85ea5b4e7a87d5f9bf400b680c73060daae2dfb --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_12.attention.query.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57a727917d1d219abc780f193cf148d65ffb208fd1a5035708d33df99876fbc7 +size 4384290 diff --git a/checkpoint_10000/target.encoder.layers_12.attention.value.kernel/.zarray b/checkpoint_10000/target.encoder.layers_12.attention.value.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_12.attention.value.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_12.attention.value.kernel/0.0 b/checkpoint_10000/target.encoder.layers_12.attention.value.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..37322891c29b7cfa78c24fcf98f9e81b7b9edc9e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_12.attention.value.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b79d2171d7e31ce665161ad061c2b2a35554f79537c223a0ec35c4e1f3e590c +size 4396787 diff --git a/checkpoint_10000/target.encoder.layers_12.mlp.wi_0.kernel/.zarray b/checkpoint_10000/target.encoder.layers_12.mlp.wi_0.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_12.mlp.wi_0.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.encoder.layers_12.mlp.wi_0.kernel/0.0 b/checkpoint_10000/target.encoder.layers_12.mlp.wi_0.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..95f97248d725f23ab9dd115365618687d11578b8 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_12.mlp.wi_0.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d80a616141dc013a07aa191d3e0107f685746c7239e9c12870336c8a2fe0125d +size 22689788 diff --git a/checkpoint_10000/target.encoder.layers_12.mlp.wi_1.kernel/.zarray b/checkpoint_10000/target.encoder.layers_12.mlp.wi_1.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_12.mlp.wi_1.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.encoder.layers_12.mlp.wi_1.kernel/0.0 b/checkpoint_10000/target.encoder.layers_12.mlp.wi_1.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..9f12f8386270ea7838937cdb25b7cc49fe193cc8 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_12.mlp.wi_1.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be23b2f7a78f639f1d8ea7eb679238267e7448e5cacc4426991fd7e96954cbe4 +size 22698724 diff --git a/checkpoint_10000/target.encoder.layers_12.mlp.wo.kernel/.zarray b/checkpoint_10000/target.encoder.layers_12.mlp.wo.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..ee708e4e956632ad238f9d97ec32a9fb108bec46 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_12.mlp.wo.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1ae155dfdfb8a9700e2a43a95cf96febfc4df5aeb3ea32aae29dde773629c2d +size 180 diff --git a/checkpoint_10000/target.encoder.layers_12.mlp.wo.kernel/0.0 b/checkpoint_10000/target.encoder.layers_12.mlp.wo.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..09bef9ec0d845a9a8437ac0bd5c330c54bde1340 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_12.mlp.wo.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e543ec2135e23be2cb0372e9b0ccb8258cb755c2b91b62961298f24b921c1a0 +size 22695337 diff --git a/checkpoint_10000/target.encoder.layers_12.pre_attention_layer_norm.scale/.zarray b/checkpoint_10000/target.encoder.layers_12.pre_attention_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_12.pre_attention_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.encoder.layers_12.pre_attention_layer_norm.scale/0 b/checkpoint_10000/target.encoder.layers_12.pre_attention_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..160e26de2e664c4d74c4771aeab6ea302a51b322 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_12.pre_attention_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fac30da6a55f1f45247443c3655524e992a44e0eed18bb0003c4dd41218b74e +size 5709 diff --git a/checkpoint_10000/target.encoder.layers_12.pre_mlp_layer_norm.scale/.zarray b/checkpoint_10000/target.encoder.layers_12.pre_mlp_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_12.pre_mlp_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.encoder.layers_12.pre_mlp_layer_norm.scale/0 b/checkpoint_10000/target.encoder.layers_12.pre_mlp_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..c5f782c099e04331cd293c4f1fcaabf84ca7e08f --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_12.pre_mlp_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67d041e4a84cd99e9aad27160a25a5f2dd3efc917a691ffe8e15ee334d94fea6 +size 5613 diff --git a/checkpoint_10000/target.encoder.layers_13.attention.key.kernel/.zarray b/checkpoint_10000/target.encoder.layers_13.attention.key.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_13.attention.key.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_13.attention.key.kernel/0.0 b/checkpoint_10000/target.encoder.layers_13.attention.key.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..c42cf4514b479bebc5e1e1aa361d9f731a73a32d --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_13.attention.key.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfab574dc9cae4aa299c18ea283b6f37ffbdc8caa89503f35988ca66cb32af5b +size 4396357 diff --git a/checkpoint_10000/target.encoder.layers_13.attention.out.kernel/.zarray b/checkpoint_10000/target.encoder.layers_13.attention.out.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..120376f0f1c75a027343e7502f698ea154b8203e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_13.attention.out.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44605053a4b2d28515bc45d11e69af731efe343126253bbb183fcfd595a20b0b +size 178 diff --git a/checkpoint_10000/target.encoder.layers_13.attention.out.kernel/0.0 b/checkpoint_10000/target.encoder.layers_13.attention.out.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..bf8797b478ad1bbe9da58c1a42d03a29f10adeaa --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_13.attention.out.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ff7d498c34112a23575463e03fd96ab82c70377c16c4ce235748ca70265c0f4 +size 4388110 diff --git a/checkpoint_10000/target.encoder.layers_13.attention.query.kernel/.zarray b/checkpoint_10000/target.encoder.layers_13.attention.query.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_13.attention.query.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_13.attention.query.kernel/0.0 b/checkpoint_10000/target.encoder.layers_13.attention.query.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..70160ececccac1b1fd7e3ce4da55ad977a46883f --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_13.attention.query.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36a8329810a1a3fb3c635395e9eee8770841a8148f7dbb149f6dc2daadc75772 +size 4384285 diff --git a/checkpoint_10000/target.encoder.layers_13.attention.value.kernel/.zarray b/checkpoint_10000/target.encoder.layers_13.attention.value.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_13.attention.value.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_13.attention.value.kernel/0.0 b/checkpoint_10000/target.encoder.layers_13.attention.value.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..7ff383b0ced424b2979aa15dc56b31b1ab801130 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_13.attention.value.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cead68c4958c2d20751047661489260676002df546e459bd445ea2603eb5a4f +size 4396748 diff --git a/checkpoint_10000/target.encoder.layers_13.mlp.wi_0.kernel/.zarray b/checkpoint_10000/target.encoder.layers_13.mlp.wi_0.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_13.mlp.wi_0.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.encoder.layers_13.mlp.wi_0.kernel/0.0 b/checkpoint_10000/target.encoder.layers_13.mlp.wi_0.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..a8deafb4edca5fca5f4bb122b16e28f2403431b0 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_13.mlp.wi_0.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1e5bc025a9fe6a69007499d86fcd257f051f22ddc3dddf862afb1572019bc5c +size 22688698 diff --git a/checkpoint_10000/target.encoder.layers_13.mlp.wi_1.kernel/.zarray b/checkpoint_10000/target.encoder.layers_13.mlp.wi_1.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_13.mlp.wi_1.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.encoder.layers_13.mlp.wi_1.kernel/0.0 b/checkpoint_10000/target.encoder.layers_13.mlp.wi_1.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..21b6d7f529733a7bea902e0f8757dbc9d7ce381f --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_13.mlp.wi_1.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6670d64b4ca3cd9c4cbf76df07df0ef52781753520bb733acdda291484bc5065 +size 22698905 diff --git a/checkpoint_10000/target.encoder.layers_13.mlp.wo.kernel/.zarray b/checkpoint_10000/target.encoder.layers_13.mlp.wo.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..ee708e4e956632ad238f9d97ec32a9fb108bec46 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_13.mlp.wo.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1ae155dfdfb8a9700e2a43a95cf96febfc4df5aeb3ea32aae29dde773629c2d +size 180 diff --git a/checkpoint_10000/target.encoder.layers_13.mlp.wo.kernel/0.0 b/checkpoint_10000/target.encoder.layers_13.mlp.wo.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..8885b1804102bd48193e7be8c57d15c8883db3e8 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_13.mlp.wo.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af2baa55b013108f30d409192ac23555d0adec2cd9159e58e96893f9502c0a56 +size 22696001 diff --git a/checkpoint_10000/target.encoder.layers_13.pre_attention_layer_norm.scale/.zarray b/checkpoint_10000/target.encoder.layers_13.pre_attention_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_13.pre_attention_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.encoder.layers_13.pre_attention_layer_norm.scale/0 b/checkpoint_10000/target.encoder.layers_13.pre_attention_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..7d43ccc3e8d116defd4d002d36b155235e981a4c --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_13.pre_attention_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afa280970a633e5d7c695e0f075ff11603f485239de6b014002d7be6b0a236bd +size 5677 diff --git a/checkpoint_10000/target.encoder.layers_13.pre_mlp_layer_norm.scale/.zarray b/checkpoint_10000/target.encoder.layers_13.pre_mlp_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_13.pre_mlp_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.encoder.layers_13.pre_mlp_layer_norm.scale/0 b/checkpoint_10000/target.encoder.layers_13.pre_mlp_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..e6d30ba2804d7d4c952c526ad1167bd000ea2a73 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_13.pre_mlp_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62f431528b1a8bfba05df79014b43e92451bb8bcd0fcb3b54b935a7172ea2549 +size 5629 diff --git a/checkpoint_10000/target.encoder.layers_14.attention.key.kernel/.zarray b/checkpoint_10000/target.encoder.layers_14.attention.key.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_14.attention.key.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_14.attention.key.kernel/0.0 b/checkpoint_10000/target.encoder.layers_14.attention.key.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..93fd4a1c279acaa8e1f46c79ba2d071586cd6a4f --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_14.attention.key.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7d7878bf1db8d5275a5e058982842fa6422c07d656f94650460ae41bb7f4ec1 +size 4396442 diff --git a/checkpoint_10000/target.encoder.layers_14.attention.out.kernel/.zarray b/checkpoint_10000/target.encoder.layers_14.attention.out.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..120376f0f1c75a027343e7502f698ea154b8203e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_14.attention.out.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44605053a4b2d28515bc45d11e69af731efe343126253bbb183fcfd595a20b0b +size 178 diff --git a/checkpoint_10000/target.encoder.layers_14.attention.out.kernel/0.0 b/checkpoint_10000/target.encoder.layers_14.attention.out.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..8e61e9df5362a353e32d6b5332789fa7a7b21bfe --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_14.attention.out.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e622c49cd98a6972249c8f798efabe4b6a50823e0be279f31e9f5da12373f5b3 +size 4386825 diff --git a/checkpoint_10000/target.encoder.layers_14.attention.query.kernel/.zarray b/checkpoint_10000/target.encoder.layers_14.attention.query.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_14.attention.query.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_14.attention.query.kernel/0.0 b/checkpoint_10000/target.encoder.layers_14.attention.query.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..ed1e68709a4de2747e70bc01295566a53a52f30a --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_14.attention.query.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69bf17e87b8d8f7d54a8a15c15934622ed95825c3cd047b6e3979ab2e7382b82 +size 4384700 diff --git a/checkpoint_10000/target.encoder.layers_14.attention.value.kernel/.zarray b/checkpoint_10000/target.encoder.layers_14.attention.value.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_14.attention.value.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_14.attention.value.kernel/0.0 b/checkpoint_10000/target.encoder.layers_14.attention.value.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..2af8efb16c5dd5f4b9db5d107c5d767f0c437e56 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_14.attention.value.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3c8558aab4d1ccddb4a75ea788b8b77ff960691204272e08b760402e1aa893d +size 4396204 diff --git a/checkpoint_10000/target.encoder.layers_14.mlp.wi_0.kernel/.zarray b/checkpoint_10000/target.encoder.layers_14.mlp.wi_0.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_14.mlp.wi_0.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.encoder.layers_14.mlp.wi_0.kernel/0.0 b/checkpoint_10000/target.encoder.layers_14.mlp.wi_0.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..d22399ce4e0d8d82850405b60caf3272c44dac86 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_14.mlp.wi_0.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c47e05cebde2a7419ef2db2fd208419149004611c8faa619fed3bd90f6ebff08 +size 22689667 diff --git a/checkpoint_10000/target.encoder.layers_14.mlp.wi_1.kernel/.zarray b/checkpoint_10000/target.encoder.layers_14.mlp.wi_1.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_14.mlp.wi_1.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.encoder.layers_14.mlp.wi_1.kernel/0.0 b/checkpoint_10000/target.encoder.layers_14.mlp.wi_1.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..3532aa9a9e01237590d90b14f32deb11c2d0af72 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_14.mlp.wi_1.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c972f9c40ae36cc3b9464ce04f0c0cda2d511e6f2b6b994d955d8cf0347420b +size 22697114 diff --git a/checkpoint_10000/target.encoder.layers_14.mlp.wo.kernel/.zarray b/checkpoint_10000/target.encoder.layers_14.mlp.wo.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..ee708e4e956632ad238f9d97ec32a9fb108bec46 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_14.mlp.wo.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1ae155dfdfb8a9700e2a43a95cf96febfc4df5aeb3ea32aae29dde773629c2d +size 180 diff --git a/checkpoint_10000/target.encoder.layers_14.mlp.wo.kernel/0.0 b/checkpoint_10000/target.encoder.layers_14.mlp.wo.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..c7839402c14fb36645c85aaba2cb3f59a718a286 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_14.mlp.wo.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f22a40ee2db83718eaabea06d18142af72ebc983843ac67a0955ba7b6968bd4 +size 22695918 diff --git a/checkpoint_10000/target.encoder.layers_14.pre_attention_layer_norm.scale/.zarray b/checkpoint_10000/target.encoder.layers_14.pre_attention_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_14.pre_attention_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.encoder.layers_14.pre_attention_layer_norm.scale/0 b/checkpoint_10000/target.encoder.layers_14.pre_attention_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..53d4283ce13bf9fe13a023331a6d2586af44f904 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_14.pre_attention_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5f77511bcae8af54aa509b63e27722ce6680ef96c01c8ff0669696b6defaa74 +size 5691 diff --git a/checkpoint_10000/target.encoder.layers_14.pre_mlp_layer_norm.scale/.zarray b/checkpoint_10000/target.encoder.layers_14.pre_mlp_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_14.pre_mlp_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.encoder.layers_14.pre_mlp_layer_norm.scale/0 b/checkpoint_10000/target.encoder.layers_14.pre_mlp_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..00ee3af9aaff51d7c2f036fb381a368be9bdf06f --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_14.pre_mlp_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:206620fec0bab9e8e2a8532f3242c563378541eacfd921bb80d536131a7678ab +size 5618 diff --git a/checkpoint_10000/target.encoder.layers_15.attention.key.kernel/.zarray b/checkpoint_10000/target.encoder.layers_15.attention.key.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_15.attention.key.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_15.attention.key.kernel/0.0 b/checkpoint_10000/target.encoder.layers_15.attention.key.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..bc2fca5775ecc64da10b85913b175935eacd26f8 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_15.attention.key.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2adcf5393fd8600589b3bb3f1bdf84dc0933ba50c167faa261becedd13ca639 +size 4396216 diff --git a/checkpoint_10000/target.encoder.layers_15.attention.out.kernel/.zarray b/checkpoint_10000/target.encoder.layers_15.attention.out.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..120376f0f1c75a027343e7502f698ea154b8203e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_15.attention.out.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44605053a4b2d28515bc45d11e69af731efe343126253bbb183fcfd595a20b0b +size 178 diff --git a/checkpoint_10000/target.encoder.layers_15.attention.out.kernel/0.0 b/checkpoint_10000/target.encoder.layers_15.attention.out.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..a5329ec052f11e918ba2ecb3724018ed1f05d32c --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_15.attention.out.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fe557cc28293c012fa43f919d2474d86b6b5af9e54ce46864d183cd75fff294 +size 4385988 diff --git a/checkpoint_10000/target.encoder.layers_15.attention.query.kernel/.zarray b/checkpoint_10000/target.encoder.layers_15.attention.query.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_15.attention.query.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_15.attention.query.kernel/0.0 b/checkpoint_10000/target.encoder.layers_15.attention.query.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..30680132764dbe9d9e85b7acaa74313d3da65f69 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_15.attention.query.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ecca5eaf78f5d66ebdf22fb78161d15dc5279cd4c374711ad6068336fd39d8e +size 4384589 diff --git a/checkpoint_10000/target.encoder.layers_15.attention.value.kernel/.zarray b/checkpoint_10000/target.encoder.layers_15.attention.value.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_15.attention.value.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_15.attention.value.kernel/0.0 b/checkpoint_10000/target.encoder.layers_15.attention.value.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..5114bf723983fe632d70dc447ce254eeabd1a43d --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_15.attention.value.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bc4251313969ece05b8c1cd96c8ed1a0ee9fe9146987c2e82e823fe6bbfde6b +size 4396921 diff --git a/checkpoint_10000/target.encoder.layers_15.mlp.wi_0.kernel/.zarray b/checkpoint_10000/target.encoder.layers_15.mlp.wi_0.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_15.mlp.wi_0.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.encoder.layers_15.mlp.wi_0.kernel/0.0 b/checkpoint_10000/target.encoder.layers_15.mlp.wi_0.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..e73e39faa2ac7276509c834f6b9724e79687c84f --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_15.mlp.wi_0.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2ab982c94857808760fb4c6514c2e747229f53314089f99ecb9945982d5cbba +size 22687017 diff --git a/checkpoint_10000/target.encoder.layers_15.mlp.wi_1.kernel/.zarray b/checkpoint_10000/target.encoder.layers_15.mlp.wi_1.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_15.mlp.wi_1.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.encoder.layers_15.mlp.wi_1.kernel/0.0 b/checkpoint_10000/target.encoder.layers_15.mlp.wi_1.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..5b3b0065e550c32d132f6f25069e69e05ddceba3 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_15.mlp.wi_1.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dada981b72c9e8a094279f4a728333bf9a84b35151292361876e1a97e83d5cd +size 22699087 diff --git a/checkpoint_10000/target.encoder.layers_15.mlp.wo.kernel/.zarray b/checkpoint_10000/target.encoder.layers_15.mlp.wo.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..ee708e4e956632ad238f9d97ec32a9fb108bec46 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_15.mlp.wo.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1ae155dfdfb8a9700e2a43a95cf96febfc4df5aeb3ea32aae29dde773629c2d +size 180 diff --git a/checkpoint_10000/target.encoder.layers_15.mlp.wo.kernel/0.0 b/checkpoint_10000/target.encoder.layers_15.mlp.wo.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..52f45e34c31374d09a78eb273b88b45b07d206a2 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_15.mlp.wo.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88b7b8a4a87d82c7fbdc818b5fd9052d7a222c0506e255dccad7b67ff23640e0 +size 22695271 diff --git a/checkpoint_10000/target.encoder.layers_15.pre_attention_layer_norm.scale/.zarray b/checkpoint_10000/target.encoder.layers_15.pre_attention_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_15.pre_attention_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.encoder.layers_15.pre_attention_layer_norm.scale/0 b/checkpoint_10000/target.encoder.layers_15.pre_attention_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..330a2c7c0fc117d858d138b91bc639bed5dd6a63 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_15.pre_attention_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb0a6d57f4f6f9e873ee8ff128bb2b422eaf27e9890b5881ba3e698eb22d9ef2 +size 5678 diff --git a/checkpoint_10000/target.encoder.layers_15.pre_mlp_layer_norm.scale/.zarray b/checkpoint_10000/target.encoder.layers_15.pre_mlp_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_15.pre_mlp_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.encoder.layers_15.pre_mlp_layer_norm.scale/0 b/checkpoint_10000/target.encoder.layers_15.pre_mlp_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..2010f092364423228942176d2a5c90b062fd9d95 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_15.pre_mlp_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf3714ca82006f976b1149fb6dfbfae36c0327767615402a4267381c25d7341f +size 5583 diff --git a/checkpoint_10000/target.encoder.layers_16.attention.key.kernel/.zarray b/checkpoint_10000/target.encoder.layers_16.attention.key.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_16.attention.key.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_16.attention.key.kernel/0.0 b/checkpoint_10000/target.encoder.layers_16.attention.key.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..f81759064687d613dd2eb84b271182cddcaa90c6 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_16.attention.key.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28a565a91a21a2254ba38ba8dbbd76798dba95932c749bf7612ace7502c23fca +size 4396272 diff --git a/checkpoint_10000/target.encoder.layers_16.attention.out.kernel/.zarray b/checkpoint_10000/target.encoder.layers_16.attention.out.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..120376f0f1c75a027343e7502f698ea154b8203e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_16.attention.out.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44605053a4b2d28515bc45d11e69af731efe343126253bbb183fcfd595a20b0b +size 178 diff --git a/checkpoint_10000/target.encoder.layers_16.attention.out.kernel/0.0 b/checkpoint_10000/target.encoder.layers_16.attention.out.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..5e04464772176c38862e5e33e48ce85526d238dd --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_16.attention.out.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d8f7f47b30f13d47a194d6ba381080f2f9938e2e95dc08e62030fbec748a734 +size 4386861 diff --git a/checkpoint_10000/target.encoder.layers_16.attention.query.kernel/.zarray b/checkpoint_10000/target.encoder.layers_16.attention.query.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_16.attention.query.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_16.attention.query.kernel/0.0 b/checkpoint_10000/target.encoder.layers_16.attention.query.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..3c7dbbd42c6301437fe25821b1e5fc301779b377 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_16.attention.query.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a239654da54e3a1e1ffa8adc64882f72d84cc2e179e41e2518331e7360aa65a0 +size 4384736 diff --git a/checkpoint_10000/target.encoder.layers_16.attention.value.kernel/.zarray b/checkpoint_10000/target.encoder.layers_16.attention.value.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_16.attention.value.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_16.attention.value.kernel/0.0 b/checkpoint_10000/target.encoder.layers_16.attention.value.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..5cc06a2ee9ce6a9cf66ea6ddc14a6d29a0e4e6da --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_16.attention.value.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d30279108e4e22adc6c04f1c457ded2bf977669ea4339da69dde90611bd96236 +size 4397548 diff --git a/checkpoint_10000/target.encoder.layers_16.mlp.wi_0.kernel/.zarray b/checkpoint_10000/target.encoder.layers_16.mlp.wi_0.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_16.mlp.wi_0.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.encoder.layers_16.mlp.wi_0.kernel/0.0 b/checkpoint_10000/target.encoder.layers_16.mlp.wi_0.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..44e032bfd99c4bc7b1788b69a3e4aa4c3c501a79 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_16.mlp.wi_0.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74f1e515081968fc2cafbf13217689387fbb82dade874b4d9d8a4908a26c3cfb +size 22687419 diff --git a/checkpoint_10000/target.encoder.layers_16.mlp.wi_1.kernel/.zarray b/checkpoint_10000/target.encoder.layers_16.mlp.wi_1.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_16.mlp.wi_1.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.encoder.layers_16.mlp.wi_1.kernel/0.0 b/checkpoint_10000/target.encoder.layers_16.mlp.wi_1.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..779f4affc8ab5bd0d2a49ad9dc6131386dbc4f96 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_16.mlp.wi_1.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76f464f085ac0d5f48491980ec5288aa5c91b3534e5f480e1b3e098155ed079f +size 22700526 diff --git a/checkpoint_10000/target.encoder.layers_16.mlp.wo.kernel/.zarray b/checkpoint_10000/target.encoder.layers_16.mlp.wo.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..ee708e4e956632ad238f9d97ec32a9fb108bec46 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_16.mlp.wo.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1ae155dfdfb8a9700e2a43a95cf96febfc4df5aeb3ea32aae29dde773629c2d +size 180 diff --git a/checkpoint_10000/target.encoder.layers_16.mlp.wo.kernel/0.0 b/checkpoint_10000/target.encoder.layers_16.mlp.wo.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..744e9cef6e3579dacea05076abc399ea718578d4 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_16.mlp.wo.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fff497c8a068c02d61d9a7c5941c4aa2c80f16d3c9f0fd401ca5cbfd2fd758d +size 22693665 diff --git a/checkpoint_10000/target.encoder.layers_16.pre_attention_layer_norm.scale/.zarray b/checkpoint_10000/target.encoder.layers_16.pre_attention_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_16.pre_attention_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.encoder.layers_16.pre_attention_layer_norm.scale/0 b/checkpoint_10000/target.encoder.layers_16.pre_attention_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..8f62abe49fa6e5c8bc4c546a95a3e9a7d159818c --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_16.pre_attention_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af01a7e0d906509c53d0b588310781013a83debf66a6d0dd39cd1072828b9237 +size 5678 diff --git a/checkpoint_10000/target.encoder.layers_16.pre_mlp_layer_norm.scale/.zarray b/checkpoint_10000/target.encoder.layers_16.pre_mlp_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_16.pre_mlp_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.encoder.layers_16.pre_mlp_layer_norm.scale/0 b/checkpoint_10000/target.encoder.layers_16.pre_mlp_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..7b42f476dbad3710e277a978e644433f49470e75 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_16.pre_mlp_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d815b4dbcb5f7da9dfb8037b0dd133c3b882e0cdd5264e8ca6a06970238d90e +size 5573 diff --git a/checkpoint_10000/target.encoder.layers_17.attention.key.kernel/.zarray b/checkpoint_10000/target.encoder.layers_17.attention.key.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_17.attention.key.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_17.attention.key.kernel/0.0 b/checkpoint_10000/target.encoder.layers_17.attention.key.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..0aa2c1c0bab6daafbf2c9f9012dc3ea0ea6f7701 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_17.attention.key.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:012124c57ce6065f6882affe8ca40a0c0c4805b859d03c3cab6854f96e19eb15 +size 4396317 diff --git a/checkpoint_10000/target.encoder.layers_17.attention.out.kernel/.zarray b/checkpoint_10000/target.encoder.layers_17.attention.out.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..120376f0f1c75a027343e7502f698ea154b8203e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_17.attention.out.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44605053a4b2d28515bc45d11e69af731efe343126253bbb183fcfd595a20b0b +size 178 diff --git a/checkpoint_10000/target.encoder.layers_17.attention.out.kernel/0.0 b/checkpoint_10000/target.encoder.layers_17.attention.out.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..d8118305eb809948713135cb972eb420f402e230 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_17.attention.out.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0afc1eb4781fa92dbf6b53f78727ebf734eff8f88ad04d7f71a6128c1b25ae86 +size 4385339 diff --git a/checkpoint_10000/target.encoder.layers_17.attention.query.kernel/.zarray b/checkpoint_10000/target.encoder.layers_17.attention.query.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_17.attention.query.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_17.attention.query.kernel/0.0 b/checkpoint_10000/target.encoder.layers_17.attention.query.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..8c5dbf0fa65d632d1d12b257fcf0308153a02ee3 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_17.attention.query.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:315330f58b38ffaba242ef09c871ff1c4426ea9b56d94ead1b63b8a64eec9f04 +size 4384834 diff --git a/checkpoint_10000/target.encoder.layers_17.attention.value.kernel/.zarray b/checkpoint_10000/target.encoder.layers_17.attention.value.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_17.attention.value.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_17.attention.value.kernel/0.0 b/checkpoint_10000/target.encoder.layers_17.attention.value.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..91e432d32a079dae54f74cb2a1e81d288c51b666 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_17.attention.value.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d6116acb48ee7e5371abfb88fd07f3720ac84f9426f42775cae4ce80ec8948a +size 4396616 diff --git a/checkpoint_10000/target.encoder.layers_17.mlp.wi_0.kernel/.zarray b/checkpoint_10000/target.encoder.layers_17.mlp.wi_0.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_17.mlp.wi_0.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.encoder.layers_17.mlp.wi_0.kernel/0.0 b/checkpoint_10000/target.encoder.layers_17.mlp.wi_0.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..eb228802d3909f09fc865d28607152e34e40d635 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_17.mlp.wi_0.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8f31317763231bbda04f3abf172b11067aa776719d9358475d3be5dcb16e348 +size 22687271 diff --git a/checkpoint_10000/target.encoder.layers_17.mlp.wi_1.kernel/.zarray b/checkpoint_10000/target.encoder.layers_17.mlp.wi_1.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_17.mlp.wi_1.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.encoder.layers_17.mlp.wi_1.kernel/0.0 b/checkpoint_10000/target.encoder.layers_17.mlp.wi_1.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..5900e8b93c5519585b6bddf61faa6df5f0d2e3f9 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_17.mlp.wi_1.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c634e6d33867e40651878726a08fb50b6715113dae5ddbc012dd6bfdda232252 +size 22702032 diff --git a/checkpoint_10000/target.encoder.layers_17.mlp.wo.kernel/.zarray b/checkpoint_10000/target.encoder.layers_17.mlp.wo.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..ee708e4e956632ad238f9d97ec32a9fb108bec46 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_17.mlp.wo.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1ae155dfdfb8a9700e2a43a95cf96febfc4df5aeb3ea32aae29dde773629c2d +size 180 diff --git a/checkpoint_10000/target.encoder.layers_17.mlp.wo.kernel/0.0 b/checkpoint_10000/target.encoder.layers_17.mlp.wo.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..7dd9901db00daae3bb805ce47037184ab534d39f --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_17.mlp.wo.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f3eda117308c5d5341dc04c26b6beaf1df49f3b65883aaf9301de164404097b +size 22691461 diff --git a/checkpoint_10000/target.encoder.layers_17.pre_attention_layer_norm.scale/.zarray b/checkpoint_10000/target.encoder.layers_17.pre_attention_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_17.pre_attention_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.encoder.layers_17.pre_attention_layer_norm.scale/0 b/checkpoint_10000/target.encoder.layers_17.pre_attention_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..2888ac9665dd3a639053788cd0d9ef4fe0e6be76 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_17.pre_attention_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d22bb9d6eb959269ab6cdc87162217b5fc4d638d674cda3a6fd52c9b32bd6704 +size 5706 diff --git a/checkpoint_10000/target.encoder.layers_17.pre_mlp_layer_norm.scale/.zarray b/checkpoint_10000/target.encoder.layers_17.pre_mlp_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_17.pre_mlp_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.encoder.layers_17.pre_mlp_layer_norm.scale/0 b/checkpoint_10000/target.encoder.layers_17.pre_mlp_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..5581962054614ef5e0fddda5286a0b6ff70b3d86 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_17.pre_mlp_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb7984b24ee671d59124183761c3960acab49c0f8cd3025914e81d32b0022e2f +size 5562 diff --git a/checkpoint_10000/target.encoder.layers_2.attention.key.kernel/.zarray b/checkpoint_10000/target.encoder.layers_2.attention.key.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_2.attention.key.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_2.attention.key.kernel/0.0 b/checkpoint_10000/target.encoder.layers_2.attention.key.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..9255004f38cbcbd51086f6d9e08355398bf200f1 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_2.attention.key.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c78a33cebcc7c63d9263b90984db98f546886e856eb3e7b382993ac88accbc05 +size 4396140 diff --git a/checkpoint_10000/target.encoder.layers_2.attention.out.kernel/.zarray b/checkpoint_10000/target.encoder.layers_2.attention.out.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..120376f0f1c75a027343e7502f698ea154b8203e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_2.attention.out.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44605053a4b2d28515bc45d11e69af731efe343126253bbb183fcfd595a20b0b +size 178 diff --git a/checkpoint_10000/target.encoder.layers_2.attention.out.kernel/0.0 b/checkpoint_10000/target.encoder.layers_2.attention.out.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..095b5d4ce05ff0f1fe625f3edd91a8b4623a61b1 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_2.attention.out.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abe8be645c37608b758e8f8093975793b3fd9fd641e291c0ba1f92a3da929c27 +size 4387328 diff --git a/checkpoint_10000/target.encoder.layers_2.attention.query.kernel/.zarray b/checkpoint_10000/target.encoder.layers_2.attention.query.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_2.attention.query.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_2.attention.query.kernel/0.0 b/checkpoint_10000/target.encoder.layers_2.attention.query.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..43af1346d1af1aef1d98fca1fca7edafee11fbbc --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_2.attention.query.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc85bbd5c7548ab4f6bb633ece137eceacf56f508f201f0fe6ca6c332c2fe8a0 +size 4385480 diff --git a/checkpoint_10000/target.encoder.layers_2.attention.value.kernel/.zarray b/checkpoint_10000/target.encoder.layers_2.attention.value.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_2.attention.value.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_2.attention.value.kernel/0.0 b/checkpoint_10000/target.encoder.layers_2.attention.value.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..3afc4828be226556f1bc8fffb9b3eb1757a2bf65 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_2.attention.value.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75dfd67492ecd702122e43ef44352f13e24ac70ddb294db505f460453bf68fdd +size 4395978 diff --git a/checkpoint_10000/target.encoder.layers_2.mlp.wi_0.kernel/.zarray b/checkpoint_10000/target.encoder.layers_2.mlp.wi_0.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_2.mlp.wi_0.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.encoder.layers_2.mlp.wi_0.kernel/0.0 b/checkpoint_10000/target.encoder.layers_2.mlp.wi_0.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..d4444dc70c81aae7299dba2c747758813c85796a --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_2.mlp.wi_0.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0685e7b6886f12b2d378f01d88ef0e54370a9cfade161faede743ae714958137 +size 22694593 diff --git a/checkpoint_10000/target.encoder.layers_2.mlp.wi_1.kernel/.zarray b/checkpoint_10000/target.encoder.layers_2.mlp.wi_1.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_2.mlp.wi_1.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.encoder.layers_2.mlp.wi_1.kernel/0.0 b/checkpoint_10000/target.encoder.layers_2.mlp.wi_1.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..1f7700ec5638b4992839f507f09ca087a55e4952 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_2.mlp.wi_1.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bff97643969501a8a8706736b51dc9d9bec00d6a520920c5b221be9cfa905c4 +size 22707156 diff --git a/checkpoint_10000/target.encoder.layers_2.mlp.wo.kernel/.zarray b/checkpoint_10000/target.encoder.layers_2.mlp.wo.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..ee708e4e956632ad238f9d97ec32a9fb108bec46 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_2.mlp.wo.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1ae155dfdfb8a9700e2a43a95cf96febfc4df5aeb3ea32aae29dde773629c2d +size 180 diff --git a/checkpoint_10000/target.encoder.layers_2.mlp.wo.kernel/0.0 b/checkpoint_10000/target.encoder.layers_2.mlp.wo.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..881f01246e9d91375c28d9da8067b89d31025c80 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_2.mlp.wo.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47c4f2e76cb9b40bdf6826dcbef6a09e2b582ea927f991b5c540a48421038ed3 +size 22678390 diff --git a/checkpoint_10000/target.encoder.layers_2.pre_attention_layer_norm.scale/.zarray b/checkpoint_10000/target.encoder.layers_2.pre_attention_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_2.pre_attention_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.encoder.layers_2.pre_attention_layer_norm.scale/0 b/checkpoint_10000/target.encoder.layers_2.pre_attention_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..05485499498f5eb3f905cf94f8fee9d5d3404afe --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_2.pre_attention_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f56759b37b94c4438836b49b5b97c731ad10e3bcfdb14a35a4ac41ca8e95aa5b +size 5727 diff --git a/checkpoint_10000/target.encoder.layers_2.pre_mlp_layer_norm.scale/.zarray b/checkpoint_10000/target.encoder.layers_2.pre_mlp_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_2.pre_mlp_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.encoder.layers_2.pre_mlp_layer_norm.scale/0 b/checkpoint_10000/target.encoder.layers_2.pre_mlp_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..ee7950ef21788347c8e38eac266f8e2c26860b2a --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_2.pre_mlp_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a7b6fcea835d8e0004364b4a17b101cad19ca58661dcf0536543fa5beb2a3b7 +size 5712 diff --git a/checkpoint_10000/target.encoder.layers_3.attention.key.kernel/.zarray b/checkpoint_10000/target.encoder.layers_3.attention.key.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_3.attention.key.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_3.attention.key.kernel/0.0 b/checkpoint_10000/target.encoder.layers_3.attention.key.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..27f5dfcb5f88f66dfb65b369f58e68a08a12c94b --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_3.attention.key.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4045339538d60d7e25f85248a13afd08f73fdb874f862933ab263222fe9c76cd +size 4396592 diff --git a/checkpoint_10000/target.encoder.layers_3.attention.out.kernel/.zarray b/checkpoint_10000/target.encoder.layers_3.attention.out.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..120376f0f1c75a027343e7502f698ea154b8203e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_3.attention.out.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44605053a4b2d28515bc45d11e69af731efe343126253bbb183fcfd595a20b0b +size 178 diff --git a/checkpoint_10000/target.encoder.layers_3.attention.out.kernel/0.0 b/checkpoint_10000/target.encoder.layers_3.attention.out.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..1f224f017c671816d1bd8765392d3e956cbd4f89 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_3.attention.out.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4c376510719aa256d5ef351dab11f0742a80dcb5dec1b420f6d7d2da3b5cdee +size 4387073 diff --git a/checkpoint_10000/target.encoder.layers_3.attention.query.kernel/.zarray b/checkpoint_10000/target.encoder.layers_3.attention.query.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_3.attention.query.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_3.attention.query.kernel/0.0 b/checkpoint_10000/target.encoder.layers_3.attention.query.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..736437539517b9c4d977f436ba97d9519460c005 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_3.attention.query.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bc967fab25468e663188487895598d0bb8f412551c204c951f450dbb161d6a9 +size 4385514 diff --git a/checkpoint_10000/target.encoder.layers_3.attention.value.kernel/.zarray b/checkpoint_10000/target.encoder.layers_3.attention.value.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_3.attention.value.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_3.attention.value.kernel/0.0 b/checkpoint_10000/target.encoder.layers_3.attention.value.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..1a6363de8b1a9a48ae3cde2519f2085811e5cb35 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_3.attention.value.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20289745bb25e5331fefd59ba37fad434fdb8a462b063a49583603e0927538b4 +size 4395974 diff --git a/checkpoint_10000/target.encoder.layers_3.mlp.wi_0.kernel/.zarray b/checkpoint_10000/target.encoder.layers_3.mlp.wi_0.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_3.mlp.wi_0.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.encoder.layers_3.mlp.wi_0.kernel/0.0 b/checkpoint_10000/target.encoder.layers_3.mlp.wi_0.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..f4f0d0fdca5adfe4cf8a0f782dd0b16a78926fc6 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_3.mlp.wi_0.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3f835496768356101f4983af0d473dc93f889014381afc80dab6c830384b25e +size 22694092 diff --git a/checkpoint_10000/target.encoder.layers_3.mlp.wi_1.kernel/.zarray b/checkpoint_10000/target.encoder.layers_3.mlp.wi_1.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_3.mlp.wi_1.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.encoder.layers_3.mlp.wi_1.kernel/0.0 b/checkpoint_10000/target.encoder.layers_3.mlp.wi_1.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..f45e2692eef3f8541205a292268120807520d769 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_3.mlp.wi_1.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9854b5bbdab78faab8b03154ba32212f2fa244425a8149cf368d6b22782b9fe3 +size 22706088 diff --git a/checkpoint_10000/target.encoder.layers_3.mlp.wo.kernel/.zarray b/checkpoint_10000/target.encoder.layers_3.mlp.wo.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..ee708e4e956632ad238f9d97ec32a9fb108bec46 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_3.mlp.wo.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1ae155dfdfb8a9700e2a43a95cf96febfc4df5aeb3ea32aae29dde773629c2d +size 180 diff --git a/checkpoint_10000/target.encoder.layers_3.mlp.wo.kernel/0.0 b/checkpoint_10000/target.encoder.layers_3.mlp.wo.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..504da2d7199ec94c738ead5847a2a353f875ec5d --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_3.mlp.wo.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6779d5640335273e8f0c013083abc9730d675a6f54324488f5c45ff14e13f1aa +size 22682891 diff --git a/checkpoint_10000/target.encoder.layers_3.pre_attention_layer_norm.scale/.zarray b/checkpoint_10000/target.encoder.layers_3.pre_attention_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_3.pre_attention_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.encoder.layers_3.pre_attention_layer_norm.scale/0 b/checkpoint_10000/target.encoder.layers_3.pre_attention_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..095dfdc10003cf924c4419c66822784e1a3fa4c9 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_3.pre_attention_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c954c5d68d44760e80b7bc4810b18509bc2c2befbaf069900614157caaf0101 +size 5723 diff --git a/checkpoint_10000/target.encoder.layers_3.pre_mlp_layer_norm.scale/.zarray b/checkpoint_10000/target.encoder.layers_3.pre_mlp_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_3.pre_mlp_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.encoder.layers_3.pre_mlp_layer_norm.scale/0 b/checkpoint_10000/target.encoder.layers_3.pre_mlp_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..871f31fbe9675fa14f63c0791994184d1d8fd6f5 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_3.pre_mlp_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:447be92b41a63aca92ff762cf1695ecf31f5d92da7ce2188d022fabba2334746 +size 5676 diff --git a/checkpoint_10000/target.encoder.layers_4.attention.key.kernel/.zarray b/checkpoint_10000/target.encoder.layers_4.attention.key.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_4.attention.key.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_4.attention.key.kernel/0.0 b/checkpoint_10000/target.encoder.layers_4.attention.key.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..8c163fb00a595ff34127f12949d9c4d386f434c0 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_4.attention.key.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4d0edf34392ee93c008dffab78ae35f329b56adb3a86c16e48d758338599032 +size 4396713 diff --git a/checkpoint_10000/target.encoder.layers_4.attention.out.kernel/.zarray b/checkpoint_10000/target.encoder.layers_4.attention.out.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..120376f0f1c75a027343e7502f698ea154b8203e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_4.attention.out.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44605053a4b2d28515bc45d11e69af731efe343126253bbb183fcfd595a20b0b +size 178 diff --git a/checkpoint_10000/target.encoder.layers_4.attention.out.kernel/0.0 b/checkpoint_10000/target.encoder.layers_4.attention.out.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..17493c896c3712cb65527249e85d92e520be0f10 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_4.attention.out.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d56bf76ec9364c2a4c98901883d68d4c0f166bf98557a6c9aee59dd47b63591 +size 4387140 diff --git a/checkpoint_10000/target.encoder.layers_4.attention.query.kernel/.zarray b/checkpoint_10000/target.encoder.layers_4.attention.query.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_4.attention.query.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_4.attention.query.kernel/0.0 b/checkpoint_10000/target.encoder.layers_4.attention.query.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..a6dee16312bcd57602ed3c6c6ee06deb9338f49d --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_4.attention.query.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:131c99b2fa099c1045c631e667b37e595f8ccf38e1327ce779b9b68d5425c1f0 +size 4385305 diff --git a/checkpoint_10000/target.encoder.layers_4.attention.value.kernel/.zarray b/checkpoint_10000/target.encoder.layers_4.attention.value.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_4.attention.value.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_4.attention.value.kernel/0.0 b/checkpoint_10000/target.encoder.layers_4.attention.value.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..7f7793eaaaa22db2c9bd6c25fdc0a5a39eb4d84a --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_4.attention.value.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5e25c1d09f00c40c3e5de12c06babfc29d23cda71b632d23b695e989bb7ffec +size 4396149 diff --git a/checkpoint_10000/target.encoder.layers_4.mlp.wi_0.kernel/.zarray b/checkpoint_10000/target.encoder.layers_4.mlp.wi_0.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_4.mlp.wi_0.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.encoder.layers_4.mlp.wi_0.kernel/0.0 b/checkpoint_10000/target.encoder.layers_4.mlp.wi_0.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..a8da080049396d2a31e965173c5540b4582b7160 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_4.mlp.wi_0.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35ee87ff5ccac070f0eed13017809ff4d470e8b7e6fbbd858136c37fc482cd1e +size 22693804 diff --git a/checkpoint_10000/target.encoder.layers_4.mlp.wi_1.kernel/.zarray b/checkpoint_10000/target.encoder.layers_4.mlp.wi_1.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_4.mlp.wi_1.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.encoder.layers_4.mlp.wi_1.kernel/0.0 b/checkpoint_10000/target.encoder.layers_4.mlp.wi_1.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..78bd7d19e5de6005392e9c81db423b8abf10c02b --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_4.mlp.wi_1.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0138ed8cc53f2cf6e4989a9486fce98d728bde3797c4aaa7b00eb4f0e377fa2a +size 22705775 diff --git a/checkpoint_10000/target.encoder.layers_4.mlp.wo.kernel/.zarray b/checkpoint_10000/target.encoder.layers_4.mlp.wo.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..ee708e4e956632ad238f9d97ec32a9fb108bec46 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_4.mlp.wo.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1ae155dfdfb8a9700e2a43a95cf96febfc4df5aeb3ea32aae29dde773629c2d +size 180 diff --git a/checkpoint_10000/target.encoder.layers_4.mlp.wo.kernel/0.0 b/checkpoint_10000/target.encoder.layers_4.mlp.wo.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..b24b81bf868f70ebd87ee31a5b63c48c84a8d4be --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_4.mlp.wo.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bad9f2b38b32df1f74f4a5df49f24c28575fc3262c1ceb9b159814ce450685c +size 22685909 diff --git a/checkpoint_10000/target.encoder.layers_4.pre_attention_layer_norm.scale/.zarray b/checkpoint_10000/target.encoder.layers_4.pre_attention_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_4.pre_attention_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.encoder.layers_4.pre_attention_layer_norm.scale/0 b/checkpoint_10000/target.encoder.layers_4.pre_attention_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..cc1eece8fe0dc77c1069f408d25e7050fc377a5e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_4.pre_attention_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a98640c61e11c32442f0ebc0bc6df9a6f7362321b63883c7cd9ac58510974b22 +size 5711 diff --git a/checkpoint_10000/target.encoder.layers_4.pre_mlp_layer_norm.scale/.zarray b/checkpoint_10000/target.encoder.layers_4.pre_mlp_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_4.pre_mlp_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.encoder.layers_4.pre_mlp_layer_norm.scale/0 b/checkpoint_10000/target.encoder.layers_4.pre_mlp_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..46f5cb54a651355f6768515cd37bc6c98a9d6e4c --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_4.pre_mlp_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7f61ad124e293dfec8b97c567c654eaadc6bbd31f86ce871f24fb95b3791e50 +size 5679 diff --git a/checkpoint_10000/target.encoder.layers_5.attention.key.kernel/.zarray b/checkpoint_10000/target.encoder.layers_5.attention.key.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_5.attention.key.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_5.attention.key.kernel/0.0 b/checkpoint_10000/target.encoder.layers_5.attention.key.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..b391ebd312118726c3f28df983180a1a1c79c653 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_5.attention.key.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7be2920bf6722d9598f6204543ebde416ec3646294d7d49638fd94b133cb8ebd +size 4396393 diff --git a/checkpoint_10000/target.encoder.layers_5.attention.out.kernel/.zarray b/checkpoint_10000/target.encoder.layers_5.attention.out.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..120376f0f1c75a027343e7502f698ea154b8203e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_5.attention.out.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44605053a4b2d28515bc45d11e69af731efe343126253bbb183fcfd595a20b0b +size 178 diff --git a/checkpoint_10000/target.encoder.layers_5.attention.out.kernel/0.0 b/checkpoint_10000/target.encoder.layers_5.attention.out.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..26b8fadf7964ff2055974ebe8283393a81f728f7 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_5.attention.out.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1f398e479d5ec2a594df5ca007e41a2335ee67fff39422df0555f21a2e380f8 +size 4386531 diff --git a/checkpoint_10000/target.encoder.layers_5.attention.query.kernel/.zarray b/checkpoint_10000/target.encoder.layers_5.attention.query.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_5.attention.query.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_5.attention.query.kernel/0.0 b/checkpoint_10000/target.encoder.layers_5.attention.query.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..248aa01ff9cfc1633593a17a84bc3620c5f088b7 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_5.attention.query.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebcd1b5d543ffa9cac16c9034e0f5f6e16dc9a3e64cd666291cf4beba6ef9a1d +size 4385220 diff --git a/checkpoint_10000/target.encoder.layers_5.attention.value.kernel/.zarray b/checkpoint_10000/target.encoder.layers_5.attention.value.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_5.attention.value.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_5.attention.value.kernel/0.0 b/checkpoint_10000/target.encoder.layers_5.attention.value.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..2709e6ae07b67841d9ec5f46db09f47bd8007ffd --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_5.attention.value.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6968f848267d8f7c86d026ad54086521da24bd890e59a00f0c1a03ad386b4734 +size 4395966 diff --git a/checkpoint_10000/target.encoder.layers_5.mlp.wi_0.kernel/.zarray b/checkpoint_10000/target.encoder.layers_5.mlp.wi_0.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_5.mlp.wi_0.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.encoder.layers_5.mlp.wi_0.kernel/0.0 b/checkpoint_10000/target.encoder.layers_5.mlp.wi_0.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..c0fb880dcedb3d1c29668fb8be581260b7b92386 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_5.mlp.wi_0.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17aeab55d78c38b295257eee1f918f5e5e011a79a39b7555635893ec8aa3ae20 +size 22692323 diff --git a/checkpoint_10000/target.encoder.layers_5.mlp.wi_1.kernel/.zarray b/checkpoint_10000/target.encoder.layers_5.mlp.wi_1.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_5.mlp.wi_1.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.encoder.layers_5.mlp.wi_1.kernel/0.0 b/checkpoint_10000/target.encoder.layers_5.mlp.wi_1.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..e3326564b9551a095534e10313dcdc8e36e27500 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_5.mlp.wi_1.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e78c76abf5752f7aab887692e135f14c6e6ad6c68d269d55943c80fd11455767 +size 22703331 diff --git a/checkpoint_10000/target.encoder.layers_5.mlp.wo.kernel/.zarray b/checkpoint_10000/target.encoder.layers_5.mlp.wo.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..ee708e4e956632ad238f9d97ec32a9fb108bec46 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_5.mlp.wo.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1ae155dfdfb8a9700e2a43a95cf96febfc4df5aeb3ea32aae29dde773629c2d +size 180 diff --git a/checkpoint_10000/target.encoder.layers_5.mlp.wo.kernel/0.0 b/checkpoint_10000/target.encoder.layers_5.mlp.wo.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..7a8a79f65621d45949e73c66a37cbd533153c2db --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_5.mlp.wo.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e47c7795bb6d2cc7e033718300dc53471a277f8608452a51a587f0abb3fc585 +size 22689800 diff --git a/checkpoint_10000/target.encoder.layers_5.pre_attention_layer_norm.scale/.zarray b/checkpoint_10000/target.encoder.layers_5.pre_attention_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_5.pre_attention_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.encoder.layers_5.pre_attention_layer_norm.scale/0 b/checkpoint_10000/target.encoder.layers_5.pre_attention_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..1942843f92959bd8840eae926bfc9b682f2183a2 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_5.pre_attention_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:644ec17e23484c40f4a2d7b6a5e4f05a54e4c88b0855d17f2a4ab00cebcbc42c +size 5729 diff --git a/checkpoint_10000/target.encoder.layers_5.pre_mlp_layer_norm.scale/.zarray b/checkpoint_10000/target.encoder.layers_5.pre_mlp_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_5.pre_mlp_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.encoder.layers_5.pre_mlp_layer_norm.scale/0 b/checkpoint_10000/target.encoder.layers_5.pre_mlp_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..099716cc0b11a9cca6987f7e241f892d1a34799c --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_5.pre_mlp_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:137a1aa965862cc48bd8226eb627ce6787bd00e1624de048d06a420822c1aabc +size 5660 diff --git a/checkpoint_10000/target.encoder.layers_6.attention.key.kernel/.zarray b/checkpoint_10000/target.encoder.layers_6.attention.key.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_6.attention.key.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_6.attention.key.kernel/0.0 b/checkpoint_10000/target.encoder.layers_6.attention.key.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..4cfb94c10ce038bf98a3cdfe0514b54f7cd4b1da --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_6.attention.key.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:399a8dc40dc55a0dbfe5938068955a9ade2496b97ed38dadde8091ed9ef8a20e +size 4396159 diff --git a/checkpoint_10000/target.encoder.layers_6.attention.out.kernel/.zarray b/checkpoint_10000/target.encoder.layers_6.attention.out.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..120376f0f1c75a027343e7502f698ea154b8203e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_6.attention.out.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44605053a4b2d28515bc45d11e69af731efe343126253bbb183fcfd595a20b0b +size 178 diff --git a/checkpoint_10000/target.encoder.layers_6.attention.out.kernel/0.0 b/checkpoint_10000/target.encoder.layers_6.attention.out.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..78a2fd12a3b4ae1615dac2b71b3bb10d588a9a20 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_6.attention.out.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3ff314424f8f40d8039d2be6ccee8f3576a35e0ddae2d7028f99893b9e048c0 +size 4386243 diff --git a/checkpoint_10000/target.encoder.layers_6.attention.query.kernel/.zarray b/checkpoint_10000/target.encoder.layers_6.attention.query.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_6.attention.query.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_6.attention.query.kernel/0.0 b/checkpoint_10000/target.encoder.layers_6.attention.query.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..0e183360aba74209ff99e01b96ef6ec0441a7065 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_6.attention.query.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c854b3e995955ed0b8cdca030772c0693a5f5c9522c3774f5446955da69a41a +size 4385508 diff --git a/checkpoint_10000/target.encoder.layers_6.attention.value.kernel/.zarray b/checkpoint_10000/target.encoder.layers_6.attention.value.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_6.attention.value.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_6.attention.value.kernel/0.0 b/checkpoint_10000/target.encoder.layers_6.attention.value.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..6982d4e508e7ea33659d785437fd3884e4f77d09 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_6.attention.value.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:925ac45cecbf49e4dd0a24da12b8c9be0b6b0bf2c85bf5ef08613309869e8d79 +size 4396466 diff --git a/checkpoint_10000/target.encoder.layers_6.mlp.wi_0.kernel/.zarray b/checkpoint_10000/target.encoder.layers_6.mlp.wi_0.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_6.mlp.wi_0.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.encoder.layers_6.mlp.wi_0.kernel/0.0 b/checkpoint_10000/target.encoder.layers_6.mlp.wi_0.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..d416d1e0f07b11c7c71458691a68d2d2f7b8f4fc --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_6.mlp.wi_0.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7fb48ec3c377e8f4659729c5f748e9ad77a236a1d2df9798d3d1351c9f9ac38 +size 22692123 diff --git a/checkpoint_10000/target.encoder.layers_6.mlp.wi_1.kernel/.zarray b/checkpoint_10000/target.encoder.layers_6.mlp.wi_1.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_6.mlp.wi_1.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.encoder.layers_6.mlp.wi_1.kernel/0.0 b/checkpoint_10000/target.encoder.layers_6.mlp.wi_1.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..f0a7af87e210875c72e49fe1163a15e6d930b02e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_6.mlp.wi_1.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73bc43a5933c1c4222b1af4071516a5f7636724651db99424edcf6f73ebd462c +size 22703392 diff --git a/checkpoint_10000/target.encoder.layers_6.mlp.wo.kernel/.zarray b/checkpoint_10000/target.encoder.layers_6.mlp.wo.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..ee708e4e956632ad238f9d97ec32a9fb108bec46 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_6.mlp.wo.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1ae155dfdfb8a9700e2a43a95cf96febfc4df5aeb3ea32aae29dde773629c2d +size 180 diff --git a/checkpoint_10000/target.encoder.layers_6.mlp.wo.kernel/0.0 b/checkpoint_10000/target.encoder.layers_6.mlp.wo.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..4ef66d32b7dc2183ec0fdd55734d16b57009e630 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_6.mlp.wo.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcb73f2ad3703be2f18b9de5e2c8148a2072549ba768390f8aa369e30700f991 +size 22689165 diff --git a/checkpoint_10000/target.encoder.layers_6.pre_attention_layer_norm.scale/.zarray b/checkpoint_10000/target.encoder.layers_6.pre_attention_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_6.pre_attention_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.encoder.layers_6.pre_attention_layer_norm.scale/0 b/checkpoint_10000/target.encoder.layers_6.pre_attention_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..6b1aaeeebcc545f145ea06fa49beea29cec306c3 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_6.pre_attention_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c36d78362e08b8261e908b26092b24c8bc1a7ca4e33db12d40184ac6034dc9c1 +size 5724 diff --git a/checkpoint_10000/target.encoder.layers_6.pre_mlp_layer_norm.scale/.zarray b/checkpoint_10000/target.encoder.layers_6.pre_mlp_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_6.pre_mlp_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.encoder.layers_6.pre_mlp_layer_norm.scale/0 b/checkpoint_10000/target.encoder.layers_6.pre_mlp_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..0b12c0fe98284aa192c1b71ec373b524ab137ff9 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_6.pre_mlp_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff72a050f5919d0892bd18edd4e1e7a84bc0829af8f7544e33402f9f1f7c03f3 +size 5635 diff --git a/checkpoint_10000/target.encoder.layers_7.attention.key.kernel/.zarray b/checkpoint_10000/target.encoder.layers_7.attention.key.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_7.attention.key.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_7.attention.key.kernel/0.0 b/checkpoint_10000/target.encoder.layers_7.attention.key.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..38e6e0de7795aaafc222f53cf7771ab20aaa07de --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_7.attention.key.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f51b311379092d32d9d1d00ce4cafd40f0ce2f5de2f2738b0a4a0e7df8520d33 +size 4396737 diff --git a/checkpoint_10000/target.encoder.layers_7.attention.out.kernel/.zarray b/checkpoint_10000/target.encoder.layers_7.attention.out.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..120376f0f1c75a027343e7502f698ea154b8203e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_7.attention.out.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44605053a4b2d28515bc45d11e69af731efe343126253bbb183fcfd595a20b0b +size 178 diff --git a/checkpoint_10000/target.encoder.layers_7.attention.out.kernel/0.0 b/checkpoint_10000/target.encoder.layers_7.attention.out.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..c1716850e19a4233aa8e71fc6f0c85ad563c5d7b --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_7.attention.out.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e19655c21b421d260abdfc5aae00344e056bf2075dae6c2dd5c07f0364f4fa5f +size 4385756 diff --git a/checkpoint_10000/target.encoder.layers_7.attention.query.kernel/.zarray b/checkpoint_10000/target.encoder.layers_7.attention.query.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_7.attention.query.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_7.attention.query.kernel/0.0 b/checkpoint_10000/target.encoder.layers_7.attention.query.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..f22e3a5b57b9d5c28db1c05f9bc03ca753537dfc --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_7.attention.query.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:828fa27111a7e8ce327763ca613cd2111340dd6abc352ba251c6f6ec9a9765fc +size 4384961 diff --git a/checkpoint_10000/target.encoder.layers_7.attention.value.kernel/.zarray b/checkpoint_10000/target.encoder.layers_7.attention.value.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_7.attention.value.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_7.attention.value.kernel/0.0 b/checkpoint_10000/target.encoder.layers_7.attention.value.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..fd56ad397fb83f0d4e55c9f68ea73ef7508909f6 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_7.attention.value.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8f61154673c7e8e8cde3832ef61b538e9c139a3ffe6b144bdd31be7b68f8010 +size 4396693 diff --git a/checkpoint_10000/target.encoder.layers_7.mlp.wi_0.kernel/.zarray b/checkpoint_10000/target.encoder.layers_7.mlp.wi_0.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_7.mlp.wi_0.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.encoder.layers_7.mlp.wi_0.kernel/0.0 b/checkpoint_10000/target.encoder.layers_7.mlp.wi_0.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..77330b5038e8d2f29b30a5bd7f775ebbded4106d --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_7.mlp.wi_0.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0eab918d8100197f5bdcbabb234412ca2cc02fbc1054a03ce8470a96522dc5c2 +size 22691024 diff --git a/checkpoint_10000/target.encoder.layers_7.mlp.wi_1.kernel/.zarray b/checkpoint_10000/target.encoder.layers_7.mlp.wi_1.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_7.mlp.wi_1.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.encoder.layers_7.mlp.wi_1.kernel/0.0 b/checkpoint_10000/target.encoder.layers_7.mlp.wi_1.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..a54483e01ec468cfeb448a97033ebe04aa7c8fc2 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_7.mlp.wi_1.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:550d3c5904b820e61f280b9d8be3759de44a156a8a228e11ce60d303d4ad4699 +size 22701371 diff --git a/checkpoint_10000/target.encoder.layers_7.mlp.wo.kernel/.zarray b/checkpoint_10000/target.encoder.layers_7.mlp.wo.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..ee708e4e956632ad238f9d97ec32a9fb108bec46 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_7.mlp.wo.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1ae155dfdfb8a9700e2a43a95cf96febfc4df5aeb3ea32aae29dde773629c2d +size 180 diff --git a/checkpoint_10000/target.encoder.layers_7.mlp.wo.kernel/0.0 b/checkpoint_10000/target.encoder.layers_7.mlp.wo.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..4058cb50ef2ba5046d0682ca9ac2432ac5de3791 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_7.mlp.wo.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a23f693a24ca79e704151d20ef5c363d371364519591cfda0943d2b2f43f57f +size 22696124 diff --git a/checkpoint_10000/target.encoder.layers_7.pre_attention_layer_norm.scale/.zarray b/checkpoint_10000/target.encoder.layers_7.pre_attention_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_7.pre_attention_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.encoder.layers_7.pre_attention_layer_norm.scale/0 b/checkpoint_10000/target.encoder.layers_7.pre_attention_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..02ef6475f1ab87d4cd7c7fe34028b4e9840a29c4 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_7.pre_attention_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6620f3e28fd98801d57130ba87231482afc2829962d792abc0b4c193182de208 +size 5686 diff --git a/checkpoint_10000/target.encoder.layers_7.pre_mlp_layer_norm.scale/.zarray b/checkpoint_10000/target.encoder.layers_7.pre_mlp_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_7.pre_mlp_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.encoder.layers_7.pre_mlp_layer_norm.scale/0 b/checkpoint_10000/target.encoder.layers_7.pre_mlp_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..f14fff11138b5e612d56c71c17cbf63b6749bbb1 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_7.pre_mlp_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2822d6a428049897ce685a8eeb53442bf6827b3f974b5f044647af2e6fed0ce9 +size 5627 diff --git a/checkpoint_10000/target.encoder.layers_8.attention.key.kernel/.zarray b/checkpoint_10000/target.encoder.layers_8.attention.key.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_8.attention.key.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_8.attention.key.kernel/0.0 b/checkpoint_10000/target.encoder.layers_8.attention.key.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..1f7d526a59ed5bf80faa3352666196add1a8ecd5 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_8.attention.key.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afaa3212ed932df68e2b8e050c2f67619264b0076297aec7c57c5e17aad2349a +size 4396252 diff --git a/checkpoint_10000/target.encoder.layers_8.attention.out.kernel/.zarray b/checkpoint_10000/target.encoder.layers_8.attention.out.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..120376f0f1c75a027343e7502f698ea154b8203e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_8.attention.out.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44605053a4b2d28515bc45d11e69af731efe343126253bbb183fcfd595a20b0b +size 178 diff --git a/checkpoint_10000/target.encoder.layers_8.attention.out.kernel/0.0 b/checkpoint_10000/target.encoder.layers_8.attention.out.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..fc8af6f682a130a4f24fb5a41b4dcacac4d1f4ab --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_8.attention.out.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99e88072b65da86156e5383a6e4cca424770b36509f1b6585b3e3235b7ad21d2 +size 4386233 diff --git a/checkpoint_10000/target.encoder.layers_8.attention.query.kernel/.zarray b/checkpoint_10000/target.encoder.layers_8.attention.query.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_8.attention.query.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_8.attention.query.kernel/0.0 b/checkpoint_10000/target.encoder.layers_8.attention.query.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..5287265ed24044a1335ad9b8aba4beea4289d701 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_8.attention.query.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb927842ea5477f84609fbde03c9b45f38fb37e30d7b67966f27af6aef7bb0f9 +size 4385463 diff --git a/checkpoint_10000/target.encoder.layers_8.attention.value.kernel/.zarray b/checkpoint_10000/target.encoder.layers_8.attention.value.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_8.attention.value.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_8.attention.value.kernel/0.0 b/checkpoint_10000/target.encoder.layers_8.attention.value.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..fd58fed13e4622e776bb3d3550ec6eb3620574fe --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_8.attention.value.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42583550a151b95f41272ad27d16b7ffab732706330bdf1fcc6aed6528a97b61 +size 4396364 diff --git a/checkpoint_10000/target.encoder.layers_8.mlp.wi_0.kernel/.zarray b/checkpoint_10000/target.encoder.layers_8.mlp.wi_0.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_8.mlp.wi_0.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.encoder.layers_8.mlp.wi_0.kernel/0.0 b/checkpoint_10000/target.encoder.layers_8.mlp.wi_0.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..b9aedded09e83a122eadadd489cda33fbd943ce2 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_8.mlp.wi_0.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87e0594e317626fa876bd00133521dee7c8ef2022c56971bccc37daf5441d578 +size 22690083 diff --git a/checkpoint_10000/target.encoder.layers_8.mlp.wi_1.kernel/.zarray b/checkpoint_10000/target.encoder.layers_8.mlp.wi_1.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_8.mlp.wi_1.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.encoder.layers_8.mlp.wi_1.kernel/0.0 b/checkpoint_10000/target.encoder.layers_8.mlp.wi_1.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..9240bccbf37ba3d91816b775b461c421e38d8665 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_8.mlp.wi_1.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5f1a0e9c521af93cce5dcdab3f03c7c369e040bbd5b73cc7d1572299bbbf922 +size 22699980 diff --git a/checkpoint_10000/target.encoder.layers_8.mlp.wo.kernel/.zarray b/checkpoint_10000/target.encoder.layers_8.mlp.wo.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..ee708e4e956632ad238f9d97ec32a9fb108bec46 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_8.mlp.wo.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1ae155dfdfb8a9700e2a43a95cf96febfc4df5aeb3ea32aae29dde773629c2d +size 180 diff --git a/checkpoint_10000/target.encoder.layers_8.mlp.wo.kernel/0.0 b/checkpoint_10000/target.encoder.layers_8.mlp.wo.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..fb8f11a9a24e0808ba6a0a4b560948359057c1dc --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_8.mlp.wo.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca880db0c24d9a514132ecb1545a6e1efe1788600f82eaaa9645ecb2c5f050bf +size 22695035 diff --git a/checkpoint_10000/target.encoder.layers_8.pre_attention_layer_norm.scale/.zarray b/checkpoint_10000/target.encoder.layers_8.pre_attention_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_8.pre_attention_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.encoder.layers_8.pre_attention_layer_norm.scale/0 b/checkpoint_10000/target.encoder.layers_8.pre_attention_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..e6145e5646035718ec2a68f1f896a4d93bc9fb45 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_8.pre_attention_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe2bcca69ecc11dd23065855b0ca100ec6d4028e5df9ec63f799927a72eb787a +size 5730 diff --git a/checkpoint_10000/target.encoder.layers_8.pre_mlp_layer_norm.scale/.zarray b/checkpoint_10000/target.encoder.layers_8.pre_mlp_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_8.pre_mlp_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.encoder.layers_8.pre_mlp_layer_norm.scale/0 b/checkpoint_10000/target.encoder.layers_8.pre_mlp_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..914655cbfcdb7c613d02187124acb489fd36354e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_8.pre_mlp_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6aaf402d8d9b5b03d7e720455211227c1cc642f788ce04a9ad00bdb6de239189 +size 5622 diff --git a/checkpoint_10000/target.encoder.layers_9.attention.key.kernel/.zarray b/checkpoint_10000/target.encoder.layers_9.attention.key.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_9.attention.key.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_9.attention.key.kernel/0.0 b/checkpoint_10000/target.encoder.layers_9.attention.key.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..7868966beee1f70069926850b2c9e3e9d6efca29 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_9.attention.key.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70279432072bdea36ed82ee7798f44ddb61572a874dbea22e90f98adddd68b8b +size 4396452 diff --git a/checkpoint_10000/target.encoder.layers_9.attention.out.kernel/.zarray b/checkpoint_10000/target.encoder.layers_9.attention.out.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..120376f0f1c75a027343e7502f698ea154b8203e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_9.attention.out.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44605053a4b2d28515bc45d11e69af731efe343126253bbb183fcfd595a20b0b +size 178 diff --git a/checkpoint_10000/target.encoder.layers_9.attention.out.kernel/0.0 b/checkpoint_10000/target.encoder.layers_9.attention.out.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..a72da0211f722331d3117a95ee11fe838308555c --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_9.attention.out.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2621e581ec68ab0c74970ee952d4e97fecdaf07f5b66ba252c178899e3ea7568 +size 4386248 diff --git a/checkpoint_10000/target.encoder.layers_9.attention.query.kernel/.zarray b/checkpoint_10000/target.encoder.layers_9.attention.query.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_9.attention.query.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_9.attention.query.kernel/0.0 b/checkpoint_10000/target.encoder.layers_9.attention.query.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..c051bae6cd0c40ffc909c54ddd78cfe38c0bd9d3 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_9.attention.query.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bdc3f1cd8c0b8decf6e4ccabc25309ce04d9b51d088cf2ea64a4a0231118e47 +size 4384514 diff --git a/checkpoint_10000/target.encoder.layers_9.attention.value.kernel/.zarray b/checkpoint_10000/target.encoder.layers_9.attention.value.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..323c73892f1f7a7707d2132576a926292d5428ec --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_9.attention.value.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a7f99dd4ddeefb7ac17eca3753bcddd9ca6e3b953f538812b3f1ada2367075 +size 178 diff --git a/checkpoint_10000/target.encoder.layers_9.attention.value.kernel/0.0 b/checkpoint_10000/target.encoder.layers_9.attention.value.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..7d49d72f6dc6d777e9ff204d6e85a601b0b6b7d1 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_9.attention.value.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b7eb46e8a725faedd11528b20b2aac54b79bed36af6b03bbb66ac1f902e4aa8 +size 4396373 diff --git a/checkpoint_10000/target.encoder.layers_9.mlp.wi_0.kernel/.zarray b/checkpoint_10000/target.encoder.layers_9.mlp.wi_0.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_9.mlp.wi_0.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.encoder.layers_9.mlp.wi_0.kernel/0.0 b/checkpoint_10000/target.encoder.layers_9.mlp.wi_0.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..5f2ba1a16e833a1414374bb71d628204997d3219 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_9.mlp.wi_0.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e59f340178b342b17d19d421c2a8e44e2fefa8db5b5ef3d93fd2a55411cf028 +size 22689684 diff --git a/checkpoint_10000/target.encoder.layers_9.mlp.wi_1.kernel/.zarray b/checkpoint_10000/target.encoder.layers_9.mlp.wi_1.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f4d3ca6a1157de47afc225abfd2510f2cdb7ae2e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_9.mlp.wi_1.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cb78978e5bca2290d248f82714b3aea40f904254130ec231bc6e07def6ccdf +size 180 diff --git a/checkpoint_10000/target.encoder.layers_9.mlp.wi_1.kernel/0.0 b/checkpoint_10000/target.encoder.layers_9.mlp.wi_1.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..0ea95ce844fd73eed99e3a3ddda7193bcac838a8 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_9.mlp.wi_1.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bd334e05c5bf68c6e93417fa8dc9d0a65aa7a30309199b8cd4566b2c479bef8 +size 22699267 diff --git a/checkpoint_10000/target.encoder.layers_9.mlp.wo.kernel/.zarray b/checkpoint_10000/target.encoder.layers_9.mlp.wo.kernel/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..ee708e4e956632ad238f9d97ec32a9fb108bec46 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_9.mlp.wo.kernel/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1ae155dfdfb8a9700e2a43a95cf96febfc4df5aeb3ea32aae29dde773629c2d +size 180 diff --git a/checkpoint_10000/target.encoder.layers_9.mlp.wo.kernel/0.0 b/checkpoint_10000/target.encoder.layers_9.mlp.wo.kernel/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..e3e3be16d6438234107202b70b7b859e82c93b59 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_9.mlp.wo.kernel/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52dc3acf9ddcff3eadad2359ec3451f3d1a44ce477f9f6639b74f1e655e3e48f +size 22697702 diff --git a/checkpoint_10000/target.encoder.layers_9.pre_attention_layer_norm.scale/.zarray b/checkpoint_10000/target.encoder.layers_9.pre_attention_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_9.pre_attention_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.encoder.layers_9.pre_attention_layer_norm.scale/0 b/checkpoint_10000/target.encoder.layers_9.pre_attention_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..0597ed3b0e82a4e138a44efacf1d1102ec20736c --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_9.pre_attention_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc3eca46a6d9a4d1069135d8d9dedd840323214ac87268fd1749ebc1432d302d +size 5696 diff --git a/checkpoint_10000/target.encoder.layers_9.pre_mlp_layer_norm.scale/.zarray b/checkpoint_10000/target.encoder.layers_9.pre_mlp_layer_norm.scale/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..30bba180531f2e2608776cf531b0f327534ab60e --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_9.pre_mlp_layer_norm.scale/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc +size 170 diff --git a/checkpoint_10000/target.encoder.layers_9.pre_mlp_layer_norm.scale/0 b/checkpoint_10000/target.encoder.layers_9.pre_mlp_layer_norm.scale/0 new file mode 100644 index 0000000000000000000000000000000000000000..c1cedd9aa086c92d7d7178f32bfc635b86174cb1 --- /dev/null +++ b/checkpoint_10000/target.encoder.layers_9.pre_mlp_layer_norm.scale/0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7208eb63a8cf23f2a0763d6d5be7380d6b5fd82fe1c0556283e490b3c373809a +size 5618 diff --git a/checkpoint_10000/target.encoder.relpos_bias.rel_embedding/.zarray b/checkpoint_10000/target.encoder.relpos_bias.rel_embedding/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..f0ebb438b0df77a995f437c9cf19936cd0a38f79 --- /dev/null +++ b/checkpoint_10000/target.encoder.relpos_bias.rel_embedding/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7028b3f2007ef72746d64487314ca6723cab7f9713ecb30bd30b73a22418e831 +size 172 diff --git a/checkpoint_10000/target.encoder.relpos_bias.rel_embedding/0.0 b/checkpoint_10000/target.encoder.relpos_bias.rel_embedding/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..c4c38e697f6cd4d343d4c63ad1e8b174788a65b1 --- /dev/null +++ b/checkpoint_10000/target.encoder.relpos_bias.rel_embedding/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e4b5148331891f25c63ce0f73d5396a1cae7d4d90cf8b07191d710e33bfcb71 +size 1498 diff --git a/checkpoint_10000/target.token_embedder.embedding/.zarray b/checkpoint_10000/target.token_embedder.embedding/.zarray new file mode 100644 index 0000000000000000000000000000000000000000..83afa8bef0af9940b11d0031253ab64925482c19 --- /dev/null +++ b/checkpoint_10000/target.token_embedder.embedding/.zarray @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:390d9ca46c6f427590b6104d24c3004f0940f37d6b116bdfc378c8945b375aaf +size 178 diff --git a/checkpoint_10000/target.token_embedder.embedding/0.0 b/checkpoint_10000/target.token_embedder.embedding/0.0 new file mode 100644 index 0000000000000000000000000000000000000000..9665c88918847eeb36f34645168a6b99e7266b08 --- /dev/null +++ b/checkpoint_10000/target.token_embedder.embedding/0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dafce7cb81900007a29ad3b12920af80ced227716b5b1cb0900cae26e233e655 +size 2193696 diff --git a/config.gin b/config.gin new file mode 100644 index 0000000000000000000000000000000000000000..6e4f2df59a40c1a1044f421f5d35bdce6fa9567a --- /dev/null +++ b/config.gin @@ -0,0 +1,141 @@ +from __gin__ import dynamic_registration +import __main__ as train_script +import seqio +from t5x import adafactor +from t5x.examples.t5 import network +from t5x import gin_utils +from t5x import models +from t5x import partitioning +from t5x import trainer +from t5x import utils +import tasks + +# Macros: +# ============================================================================== +BATCH_SIZE = 256 +DROPOUT_RATE = 0.0 +LABEL_SMOOTHING = 0.0 +LOSS_NORMALIZING_FACTOR = None +MIXTURE_OR_TASK_MODULE = None +MIXTURE_OR_TASK_NAME = 'byt5_pretrain_finnish' +MODEL = @models.EncoderDecoderModel() +MODEL_DIR = '/researchdisk/byt5-base-finnish' +OPTIMIZER = @adafactor.Adafactor() +RANDOM_SEED = None +SHUFFLE_TRAIN_EXAMPLES = True +TASK_FEATURE_LENGTHS = {'inputs': 512, 'targets': 512} +TRAIN_STEPS = 1000000 +USE_CACHED_TASKS = False +USE_HARDWARE_RNG = False +VOCABULARY = @seqio.ByteVocabulary() +Z_LOSS = 0.0001 + +# Parameters for adafactor.Adafactor: +# ============================================================================== +adafactor.Adafactor.decay_rate = 0.8 +adafactor.Adafactor.logical_factor_rules = \ + @adafactor.standard_logical_factor_rules() +adafactor.Adafactor.step_offset = 0 + +# Parameters for utils.CheckpointConfig: +# ============================================================================== +utils.CheckpointConfig.restore = @utils.RestoreCheckpointConfig() +utils.CheckpointConfig.save = @utils.SaveCheckpointConfig() + +# Parameters for utils.create_learning_rate_scheduler: +# ============================================================================== +utils.create_learning_rate_scheduler.base_learning_rate = 1.0 +utils.create_learning_rate_scheduler.factors = 'constant * rsqrt_decay' +utils.create_learning_rate_scheduler.warmup_steps = 10000 + +# Parameters for train/utils.DatasetConfig: +# ============================================================================== +train/utils.DatasetConfig.batch_size = %BATCH_SIZE +train/utils.DatasetConfig.mixture_or_task_name = %MIXTURE_OR_TASK_NAME +train/utils.DatasetConfig.module = %MIXTURE_OR_TASK_MODULE +train/utils.DatasetConfig.pack = True +train/utils.DatasetConfig.seed = None +train/utils.DatasetConfig.shuffle = %SHUFFLE_TRAIN_EXAMPLES +train/utils.DatasetConfig.split = 'train' +train/utils.DatasetConfig.task_feature_lengths = %TASK_FEATURE_LENGTHS +train/utils.DatasetConfig.use_cached = %USE_CACHED_TASKS + +# Parameters for train_eval/utils.DatasetConfig: +# ============================================================================== +train_eval/utils.DatasetConfig.batch_size = %BATCH_SIZE +train_eval/utils.DatasetConfig.mixture_or_task_name = %MIXTURE_OR_TASK_NAME +train_eval/utils.DatasetConfig.module = %MIXTURE_OR_TASK_MODULE +train_eval/utils.DatasetConfig.pack = True +train_eval/utils.DatasetConfig.seed = 42 +train_eval/utils.DatasetConfig.shuffle = False +train_eval/utils.DatasetConfig.split = 'validation' +train_eval/utils.DatasetConfig.task_feature_lengths = %TASK_FEATURE_LENGTHS +train_eval/utils.DatasetConfig.use_cached = %USE_CACHED_TASKS + +# Parameters for models.EncoderDecoderModel: +# ============================================================================== +models.EncoderDecoderModel.input_vocabulary = %VOCABULARY +models.EncoderDecoderModel.label_smoothing = %LABEL_SMOOTHING +models.EncoderDecoderModel.loss_normalizing_factor = %LOSS_NORMALIZING_FACTOR +models.EncoderDecoderModel.module = @network.Transformer() +models.EncoderDecoderModel.optimizer_def = %OPTIMIZER +models.EncoderDecoderModel.output_vocabulary = %VOCABULARY +models.EncoderDecoderModel.z_loss = %Z_LOSS + +# Parameters for partitioning.PjitPartitioner: +# ============================================================================== +partitioning.PjitPartitioner.logical_axis_rules = \ + @partitioning.standard_logical_axis_rules() +partitioning.PjitPartitioner.model_parallel_submesh = None +partitioning.PjitPartitioner.num_partitions = 1 + +# Parameters for utils.RestoreCheckpointConfig: +# ============================================================================== +utils.RestoreCheckpointConfig.path = [] + +# Parameters for utils.SaveCheckpointConfig: +# ============================================================================== +utils.SaveCheckpointConfig.dtype = 'float32' +utils.SaveCheckpointConfig.keep = 10 +utils.SaveCheckpointConfig.period = 10000 +utils.SaveCheckpointConfig.save_dataset = False + +# Parameters for network.T5Config: +# ============================================================================== +network.T5Config.dropout_rate = %DROPOUT_RATE +network.T5Config.dtype = 'bfloat16' +network.T5Config.emb_dim = 1536 +network.T5Config.head_dim = 64 +network.T5Config.logits_via_embedding = False +network.T5Config.mlp_activations = ('gelu', 'linear') +network.T5Config.mlp_dim = 3968 +network.T5Config.num_decoder_layers = 6 +network.T5Config.num_encoder_layers = 18 +network.T5Config.num_heads = 12 +network.T5Config.vocab_size = 384 + +# Parameters for train_script.train: +# ============================================================================== +train_script.train.checkpoint_cfg = @utils.CheckpointConfig() +train_script.train.eval_period = 10000 +train_script.train.eval_steps = 20 +train_script.train.infer_eval_dataset_cfg = None +train_script.train.model = %MODEL +train_script.train.model_dir = %MODEL_DIR +train_script.train.partitioner = @partitioning.PjitPartitioner() +train_script.train.random_seed = %RANDOM_SEED +train_script.train.summarize_config_fn = @gin_utils.summarize_gin_config +train_script.train.total_steps = %TRAIN_STEPS +train_script.train.train_dataset_cfg = @train/utils.DatasetConfig() +train_script.train.train_eval_dataset_cfg = @train_eval/utils.DatasetConfig() +train_script.train.trainer_cls = @trainer.Trainer +train_script.train.use_hardware_rng = %USE_HARDWARE_RNG + +# Parameters for trainer.Trainer: +# ============================================================================== +trainer.Trainer.learning_rate_fn = @utils.create_learning_rate_scheduler() +trainer.Trainer.num_microbatches = None + +# Parameters for network.Transformer: +# ============================================================================== +network.Transformer.config = @network.T5Config() diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a1771314e5b52778f361aae33c9899dec1750612 --- /dev/null +++ b/config.json @@ -0,0 +1,27 @@ +{ + "architectures": [ + "T5ForConditionalGeneration" + ], + "d_ff": 3968, + "d_kv": 64, + "d_model": 1536, + "decoder_start_token_id": 0, + "dropout_rate": 0.1, + "eos_token_id": 1, + "feed_forward_proj": "gated-gelu", + "gradient_checkpointing": false, + "initializer_factor": 1.0, + "is_encoder_decoder": true, + "layer_norm_epsilon": 1e-06, + "model_type": "t5", + "num_decoder_layers": 6, + "num_heads": 12, + "num_layers": 18, + "output_past": true, + "pad_token_id": 0, + "relative_attention_num_buckets": 32, + "tie_word_embeddings": false, + "tokenizer_class": "ByT5Tokenizer", + "use_cache": true, + "vocab_size": 384 + } \ No newline at end of file diff --git a/convert_t5x_checkpoint_to_flax.py b/convert_t5x_checkpoint_to_flax.py new file mode 100644 index 0000000000000000000000000000000000000000..8f4f9c2cb1a8e5a3e85ded0cd8fa219ac1941793 --- /dev/null +++ b/convert_t5x_checkpoint_to_flax.py @@ -0,0 +1,157 @@ +# https://gist.github.com/stefan-it/30e4998ef159f33696e377a46f699d9f + +import argparse + +from t5x import checkpoints +from transformers import T5Config, FlaxT5ForConditionalGeneration + + +def convert_t5x_checkpoint_to_flax(t5x_checkpoint_path, config_name, flax_dump_folder_path): + config = T5Config.from_pretrained(config_name) + flax_model = FlaxT5ForConditionalGeneration(config=config) + t5x_model = checkpoints.load_t5x_checkpoint(t5x_checkpoint_path) + + split_mlp_wi = "wi_0" in t5x_model["target"]["encoder"]["layers_0"]["mlp"] + + # Encoder + for layer_index in range(config.num_layers): + layer_name = f"layers_{str(layer_index)}" + + # Self-Attention + t5x_attention_key = t5x_model["target"]["encoder"][layer_name]["attention"]["key"]["kernel"] + t5x_attention_out = t5x_model["target"]["encoder"][layer_name]["attention"]["out"]["kernel"] + t5x_attention_query = t5x_model["target"]["encoder"][layer_name]["attention"]["query"]["kernel"] + t5x_attention_value = t5x_model["target"]["encoder"][layer_name]["attention"]["value"]["kernel"] + + ## Layer Normalization + t5x_attention_layer_norm = t5x_model["target"]["encoder"][layer_name]["pre_attention_layer_norm"]["scale"] + + if split_mlp_wi: + t5x_mlp_wi_0 = t5x_model["target"]["encoder"][layer_name]["mlp"]["wi_0"]["kernel"] + t5x_mlp_wi_1 = t5x_model["target"]["encoder"][layer_name]["mlp"]["wi_1"]["kernel"] + else: + t5x_mlp_wi = t5x_model["target"]["encoder"][layer_name]["mlp"]["wi"]["kernel"] + + t5x_mlp_wo = t5x_model["target"]["encoder"][layer_name]["mlp"]["wo"]["kernel"] + + ## Layer Normalization + t5x_mlp_layer_norm = t5x_model["target"]["encoder"][layer_name]["pre_mlp_layer_norm"]["scale"] + + # Assigning + flax_model.params["encoder"]["block"][str(layer_index)]["layer"]["0"]["SelfAttention"]["k"]["kernel"] = t5x_attention_key + flax_model.params["encoder"]["block"][str(layer_index)]["layer"]["0"]["SelfAttention"]["o"]["kernel"] = t5x_attention_out + flax_model.params["encoder"]["block"][str(layer_index)]["layer"]["0"]["SelfAttention"]["q"]["kernel"] = t5x_attention_query + flax_model.params["encoder"]["block"][str(layer_index)]["layer"]["0"]["SelfAttention"]["v"]["kernel"] = t5x_attention_value + + flax_model.params["encoder"]["block"][str(layer_index)]["layer"]["0"]["layer_norm"]["weight"] = t5x_attention_layer_norm + + if split_mlp_wi: + flax_model.params["encoder"]["block"][str(layer_index)]["layer"]["1"]["DenseReluDense"]["wi_0"]["kernel"] = t5x_mlp_wi_0 + flax_model.params["encoder"]["block"][str(layer_index)]["layer"]["1"]["DenseReluDense"]["wi_1"]["kernel"] = t5x_mlp_wi_1 + else: + flax_model.params["encoder"]["block"][str(layer_index)]["layer"]["1"]["DenseReluDense"]["wi"]["kernel"] = t5x_mlp_wi + + flax_model.params["encoder"]["block"][str(layer_index)]["layer"]["1"]["DenseReluDense"]["wo"]["kernel"] = t5x_mlp_wo + flax_model.params["encoder"]["block"][str(layer_index)]["layer"]["1"]["layer_norm"]["weight"] = t5x_mlp_layer_norm + + # Only for layer 0: + t5x_encoder_rel_embedding = t5x_model["target"]["encoder"]["relpos_bias"]["rel_embedding"].T + flax_model.params["encoder"]["block"]["0"]["layer"]["0"]["SelfAttention"]["relative_attention_bias"]["embedding"] = t5x_encoder_rel_embedding + + # Assigning + t5x_encoder_norm = t5x_model["target"]["encoder"]["encoder_norm"]["scale"] + flax_model.params["encoder"]["final_layer_norm"]["weight"] = t5x_encoder_norm + + # Decoder + for layer_index in range(config.num_layers): + layer_name = f"layers_{str(layer_index)}" + + # Self-Attention + t5x_attention_key = t5x_model["target"]["decoder"][layer_name]["self_attention"]["key"]["kernel"] + t5x_attention_out = t5x_model["target"]["decoder"][layer_name]["self_attention"]["out"]["kernel"] + t5x_attention_query = t5x_model["target"]["decoder"][layer_name]["self_attention"]["query"]["kernel"] + t5x_attention_value = t5x_model["target"]["decoder"][layer_name]["self_attention"]["value"]["kernel"] + + ## Layer Normalization + t5x_pre_attention_layer_norm = t5x_model["target"]["decoder"][layer_name]["pre_self_attention_layer_norm"]["scale"] + + # Encoder-Decoder-Attention + t5x_enc_dec_attention_key = t5x_model["target"]["decoder"][layer_name]["encoder_decoder_attention"]["key"]["kernel"] + t5x_enc_dec_attention_out = t5x_model["target"]["decoder"][layer_name]["encoder_decoder_attention"]["out"]["kernel"] + t5x_enc_dec_attention_query = t5x_model["target"]["decoder"][layer_name]["encoder_decoder_attention"]["query"]["kernel"] + t5x_enc_dec_attention_value = t5x_model["target"]["decoder"][layer_name]["encoder_decoder_attention"]["value"]["kernel"] + + ## Layer Normalization + t5x_cross_layer_norm = t5x_model["target"]["decoder"][layer_name]["pre_cross_attention_layer_norm"]["scale"] + + # MLP + if split_mlp_wi: + t5x_mlp_wi_0 = t5x_model["target"]["decoder"][layer_name]["mlp"]["wi_0"]["kernel"] + t5x_mlp_wi_1 = t5x_model["target"]["decoder"][layer_name]["mlp"]["wi_1"]["kernel"] + else: + t5x_mlp_wi = t5x_model["target"]["decoder"][layer_name]["mlp"]["wi"]["kernel"] + + t5x_mlp_wo = t5x_model["target"]["decoder"][layer_name]["mlp"]["wo"]["kernel"] + + ## Layer Normalization + tx5_mlp_layer_norm = t5x_model["target"]["decoder"][layer_name]["pre_mlp_layer_norm"]["scale"] + + # Assigning + flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["0"]["SelfAttention"]["k"]["kernel"] = t5x_attention_key + flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["0"]["SelfAttention"]["o"]["kernel"] = t5x_attention_out + flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["0"]["SelfAttention"]["q"]["kernel"] = t5x_attention_query + flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["0"]["SelfAttention"]["v"]["kernel"] = t5x_attention_value + + flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["0"]["layer_norm"]["weight"] = t5x_pre_attention_layer_norm + + flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["1"]["EncDecAttention"]["k"]["kernel"] = t5x_enc_dec_attention_key + flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["1"]["EncDecAttention"]["o"]["kernel"] = t5x_enc_dec_attention_out + flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["1"]["EncDecAttention"]["q"]["kernel"] = t5x_enc_dec_attention_query + flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["1"]["EncDecAttention"]["v"]["kernel"] = t5x_enc_dec_attention_value + + flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["1"]["layer_norm"]["weight"] = t5x_cross_layer_norm + + if split_mlp_wi: + flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["2"]["DenseReluDense"]["wi_0"]["kernel"] = t5x_mlp_wi_0 + flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["2"]["DenseReluDense"]["wi_1"]["kernel"] = t5x_mlp_wi_1 + else: + flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["2"]["DenseReluDense"]["wi"]["kernel"] = t5x_mlp_wi + + flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["2"]["DenseReluDense"]["wo"]["kernel"] = t5x_mlp_wo + + flax_model.params["decoder"]["block"][str(layer_index)]["layer"]["2"]["layer_norm"]["weight"] = tx5_mlp_layer_norm + + # Decoder Normalization + tx5_decoder_norm = t5x_model["target"]["decoder"]["decoder_norm"]["scale"] + flax_model.params["decoder"]["final_layer_norm"]["weight"] = tx5_decoder_norm + + # Only for layer 0: + t5x_decoder_rel_embedding = t5x_model["target"]["decoder"]["relpos_bias"]["rel_embedding"].T + flax_model.params["decoder"]["block"]["0"]["layer"]["0"]["SelfAttention"]["relative_attention_bias"]["embedding"] = t5x_decoder_rel_embedding + + # Token Embeddings + tx5_token_embeddings = t5x_model["target"]["token_embedder"]["embedding"] + flax_model.params["shared"]["embedding"] = tx5_token_embeddings + + # LM Head + flax_model.params["lm_head"]["kernel"] = t5x_model["target"]["decoder"]["logits_dense"]["kernel"] + + flax_model.save_pretrained(flax_dump_folder_path) + print("T5X Model was sucessfully converted!") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + # Required parameters + parser.add_argument( + "--t5x_checkpoint_path", default=None, type=str, required=True, help="Path the TX5 checkpoint." + ) + parser.add_argument( + "--config_name", default=None, type=str, required=True, help="Config name of T5 model." + ) + parser.add_argument( + "--flax_dump_folder_path", default=None, type=str, required=True, help="Path to the output FLAX model." + ) + args = parser.parse_args() + convert_t5x_checkpoint_to_flax(args.t5x_checkpoint_path, args.config_name, args.flax_dump_folder_path) + \ No newline at end of file diff --git a/flax_model_to_pytorch.py b/flax_model_to_pytorch.py new file mode 100755 index 0000000000000000000000000000000000000000..687a8285ee6a8c8403e3c43a179d1c17084bf35e --- /dev/null +++ b/flax_model_to_pytorch.py @@ -0,0 +1,27 @@ +from transformers import AutoModelForSeq2SeqLM, FlaxAutoModelForSeq2SeqLM, AutoTokenizer +import torch +import numpy as np +import jax +import jax.numpy as jnp + +def to_f32(t): + return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t) + +jax.config.update('jax_platform_name', 'cpu') +MODEL_PATH = "./" +model = FlaxAutoModelForSeq2SeqLM.from_pretrained(MODEL_PATH) +model.params = to_f32(model.params) +model.save_pretrained(MODEL_PATH) + +pt_model = AutoModelForSeq2SeqLM.from_pretrained( + MODEL_PATH, from_flax=True).to('cpu') + +input_ids = np.asarray(2 * [128 * [0]], dtype=np.int32) +input_ids_pt = torch.tensor(input_ids) + +logits_pt = pt_model(input_ids=input_ids_pt, decoder_input_ids=input_ids_pt).logits +print(logits_pt) +logits_fx = model(input_ids=input_ids, decoder_input_ids=input_ids).logits +print(logits_fx) + +pt_model.save_pretrained(MODEL_PATH) diff --git a/model-info.txt b/model-info.txt new file mode 100644 index 0000000000000000000000000000000000000000..12e304f5121e9412c00df69b7d1ea96030c96dea --- /dev/null +++ b/model-info.txt @@ -0,0 +1,1263 @@ +Variable decoder/decoder_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable decoder/layers_0/encoder_decoder_attention/key/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable decoder/layers_0/encoder_decoder_attention/out/kernel size 1179648 shape (joined_kv=768, embed=1536) partition spec ('model', None) +Variable decoder/layers_0/encoder_decoder_attention/query/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable decoder/layers_0/encoder_decoder_attention/value/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable decoder/layers_0/mlp/wi_0/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable decoder/layers_0/mlp/wi_1/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable decoder/layers_0/mlp/wo/kernel size 6094848 shape (mlp=3968, embed=1536) partition spec ('model', None) +Variable decoder/layers_0/pre_cross_attention_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable decoder/layers_0/pre_mlp_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable decoder/layers_0/pre_self_attention_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable decoder/layers_0/self_attention/key/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable decoder/layers_0/self_attention/out/kernel size 1179648 shape (joined_kv=768, embed=1536) partition spec ('model', None) +Variable decoder/layers_0/self_attention/query/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable decoder/layers_0/self_attention/value/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable decoder/layers_1/encoder_decoder_attention/key/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable decoder/layers_1/encoder_decoder_attention/out/kernel size 1179648 shape (joined_kv=768, embed=1536) partition spec ('model', None) +Variable decoder/layers_1/encoder_decoder_attention/query/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable decoder/layers_1/encoder_decoder_attention/value/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable decoder/layers_1/mlp/wi_0/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable decoder/layers_1/mlp/wi_1/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable decoder/layers_1/mlp/wo/kernel size 6094848 shape (mlp=3968, embed=1536) partition spec ('model', None) +Variable decoder/layers_1/pre_cross_attention_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable decoder/layers_1/pre_mlp_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable decoder/layers_1/pre_self_attention_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable decoder/layers_1/self_attention/key/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable decoder/layers_1/self_attention/out/kernel size 1179648 shape (joined_kv=768, embed=1536) partition spec ('model', None) +Variable decoder/layers_1/self_attention/query/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable decoder/layers_1/self_attention/value/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable decoder/layers_2/encoder_decoder_attention/key/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable decoder/layers_2/encoder_decoder_attention/out/kernel size 1179648 shape (joined_kv=768, embed=1536) partition spec ('model', None) +Variable decoder/layers_2/encoder_decoder_attention/query/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable decoder/layers_2/encoder_decoder_attention/value/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable decoder/layers_2/mlp/wi_0/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable decoder/layers_2/mlp/wi_1/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable decoder/layers_2/mlp/wo/kernel size 6094848 shape (mlp=3968, embed=1536) partition spec ('model', None) +Variable decoder/layers_2/pre_cross_attention_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable decoder/layers_2/pre_mlp_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable decoder/layers_2/pre_self_attention_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable decoder/layers_2/self_attention/key/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable decoder/layers_2/self_attention/out/kernel size 1179648 shape (joined_kv=768, embed=1536) partition spec ('model', None) +Variable decoder/layers_2/self_attention/query/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable decoder/layers_2/self_attention/value/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable decoder/layers_3/encoder_decoder_attention/key/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable decoder/layers_3/encoder_decoder_attention/out/kernel size 1179648 shape (joined_kv=768, embed=1536) partition spec ('model', None) +Variable decoder/layers_3/encoder_decoder_attention/query/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable decoder/layers_3/encoder_decoder_attention/value/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable decoder/layers_3/mlp/wi_0/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable decoder/layers_3/mlp/wi_1/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable decoder/layers_3/mlp/wo/kernel size 6094848 shape (mlp=3968, embed=1536) partition spec ('model', None) +Variable decoder/layers_3/pre_cross_attention_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable decoder/layers_3/pre_mlp_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable decoder/layers_3/pre_self_attention_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable decoder/layers_3/self_attention/key/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable decoder/layers_3/self_attention/out/kernel size 1179648 shape (joined_kv=768, embed=1536) partition spec ('model', None) +Variable decoder/layers_3/self_attention/query/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable decoder/layers_3/self_attention/value/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable decoder/layers_4/encoder_decoder_attention/key/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable decoder/layers_4/encoder_decoder_attention/out/kernel size 1179648 shape (joined_kv=768, embed=1536) partition spec ('model', None) +Variable decoder/layers_4/encoder_decoder_attention/query/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable decoder/layers_4/encoder_decoder_attention/value/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable decoder/layers_4/mlp/wi_0/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable decoder/layers_4/mlp/wi_1/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable decoder/layers_4/mlp/wo/kernel size 6094848 shape (mlp=3968, embed=1536) partition spec ('model', None) +Variable decoder/layers_4/pre_cross_attention_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable decoder/layers_4/pre_mlp_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable decoder/layers_4/pre_self_attention_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable decoder/layers_4/self_attention/key/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable decoder/layers_4/self_attention/out/kernel size 1179648 shape (joined_kv=768, embed=1536) partition spec ('model', None) +Variable decoder/layers_4/self_attention/query/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable decoder/layers_4/self_attention/value/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable decoder/layers_5/encoder_decoder_attention/key/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable decoder/layers_5/encoder_decoder_attention/out/kernel size 1179648 shape (joined_kv=768, embed=1536) partition spec ('model', None) +Variable decoder/layers_5/encoder_decoder_attention/query/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable decoder/layers_5/encoder_decoder_attention/value/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable decoder/layers_5/mlp/wi_0/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable decoder/layers_5/mlp/wi_1/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable decoder/layers_5/mlp/wo/kernel size 6094848 shape (mlp=3968, embed=1536) partition spec ('model', None) +Variable decoder/layers_5/pre_cross_attention_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable decoder/layers_5/pre_mlp_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable decoder/layers_5/pre_self_attention_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable decoder/layers_5/self_attention/key/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable decoder/layers_5/self_attention/out/kernel size 1179648 shape (joined_kv=768, embed=1536) partition spec ('model', None) +Variable decoder/layers_5/self_attention/query/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable decoder/layers_5/self_attention/value/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable decoder/logits_dense/kernel size 589824 shape (embed=1536, vocab=384) partition spec (None, 'model') +Variable decoder/relpos_bias/rel_embedding size 384 shape (heads=12, relpos_buckets=32) partition spec ('model', None) +Variable encoder/encoder_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable encoder/layers_0/attention/key/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_0/attention/out/kernel size 1179648 shape (joined_kv=768, embed=1536) partition spec ('model', None) +Variable encoder/layers_0/attention/query/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_0/attention/value/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_0/mlp/wi_0/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable encoder/layers_0/mlp/wi_1/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable encoder/layers_0/mlp/wo/kernel size 6094848 shape (mlp=3968, embed=1536) partition spec ('model', None) +Variable encoder/layers_0/pre_attention_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable encoder/layers_0/pre_mlp_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable encoder/layers_1/attention/key/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_1/attention/out/kernel size 1179648 shape (joined_kv=768, embed=1536) partition spec ('model', None) +Variable encoder/layers_1/attention/query/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_1/attention/value/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_1/mlp/wi_0/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable encoder/layers_1/mlp/wi_1/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable encoder/layers_1/mlp/wo/kernel size 6094848 shape (mlp=3968, embed=1536) partition spec ('model', None) +Variable encoder/layers_1/pre_attention_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable encoder/layers_1/pre_mlp_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable encoder/layers_10/attention/key/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_10/attention/out/kernel size 1179648 shape (joined_kv=768, embed=1536) partition spec ('model', None) +Variable encoder/layers_10/attention/query/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_10/attention/value/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_10/mlp/wi_0/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable encoder/layers_10/mlp/wi_1/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable encoder/layers_10/mlp/wo/kernel size 6094848 shape (mlp=3968, embed=1536) partition spec ('model', None) +Variable encoder/layers_10/pre_attention_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable encoder/layers_10/pre_mlp_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable encoder/layers_11/attention/key/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_11/attention/out/kernel size 1179648 shape (joined_kv=768, embed=1536) partition spec ('model', None) +Variable encoder/layers_11/attention/query/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_11/attention/value/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_11/mlp/wi_0/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable encoder/layers_11/mlp/wi_1/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable encoder/layers_11/mlp/wo/kernel size 6094848 shape (mlp=3968, embed=1536) partition spec ('model', None) +Variable encoder/layers_11/pre_attention_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable encoder/layers_11/pre_mlp_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable encoder/layers_12/attention/key/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_12/attention/out/kernel size 1179648 shape (joined_kv=768, embed=1536) partition spec ('model', None) +Variable encoder/layers_12/attention/query/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_12/attention/value/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_12/mlp/wi_0/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable encoder/layers_12/mlp/wi_1/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable encoder/layers_12/mlp/wo/kernel size 6094848 shape (mlp=3968, embed=1536) partition spec ('model', None) +Variable encoder/layers_12/pre_attention_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable encoder/layers_12/pre_mlp_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable encoder/layers_13/attention/key/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_13/attention/out/kernel size 1179648 shape (joined_kv=768, embed=1536) partition spec ('model', None) +Variable encoder/layers_13/attention/query/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_13/attention/value/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_13/mlp/wi_0/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable encoder/layers_13/mlp/wi_1/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable encoder/layers_13/mlp/wo/kernel size 6094848 shape (mlp=3968, embed=1536) partition spec ('model', None) +Variable encoder/layers_13/pre_attention_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable encoder/layers_13/pre_mlp_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable encoder/layers_14/attention/key/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_14/attention/out/kernel size 1179648 shape (joined_kv=768, embed=1536) partition spec ('model', None) +Variable encoder/layers_14/attention/query/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_14/attention/value/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_14/mlp/wi_0/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable encoder/layers_14/mlp/wi_1/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable encoder/layers_14/mlp/wo/kernel size 6094848 shape (mlp=3968, embed=1536) partition spec ('model', None) +Variable encoder/layers_14/pre_attention_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable encoder/layers_14/pre_mlp_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable encoder/layers_15/attention/key/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_15/attention/out/kernel size 1179648 shape (joined_kv=768, embed=1536) partition spec ('model', None) +Variable encoder/layers_15/attention/query/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_15/attention/value/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_15/mlp/wi_0/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable encoder/layers_15/mlp/wi_1/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable encoder/layers_15/mlp/wo/kernel size 6094848 shape (mlp=3968, embed=1536) partition spec ('model', None) +Variable encoder/layers_15/pre_attention_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable encoder/layers_15/pre_mlp_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable encoder/layers_16/attention/key/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_16/attention/out/kernel size 1179648 shape (joined_kv=768, embed=1536) partition spec ('model', None) +Variable encoder/layers_16/attention/query/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_16/attention/value/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_16/mlp/wi_0/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable encoder/layers_16/mlp/wi_1/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable encoder/layers_16/mlp/wo/kernel size 6094848 shape (mlp=3968, embed=1536) partition spec ('model', None) +Variable encoder/layers_16/pre_attention_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable encoder/layers_16/pre_mlp_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable encoder/layers_17/attention/key/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_17/attention/out/kernel size 1179648 shape (joined_kv=768, embed=1536) partition spec ('model', None) +Variable encoder/layers_17/attention/query/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_17/attention/value/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_17/mlp/wi_0/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable encoder/layers_17/mlp/wi_1/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable encoder/layers_17/mlp/wo/kernel size 6094848 shape (mlp=3968, embed=1536) partition spec ('model', None) +Variable encoder/layers_17/pre_attention_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable encoder/layers_17/pre_mlp_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable encoder/layers_2/attention/key/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_2/attention/out/kernel size 1179648 shape (joined_kv=768, embed=1536) partition spec ('model', None) +Variable encoder/layers_2/attention/query/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_2/attention/value/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_2/mlp/wi_0/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable encoder/layers_2/mlp/wi_1/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable encoder/layers_2/mlp/wo/kernel size 6094848 shape (mlp=3968, embed=1536) partition spec ('model', None) +Variable encoder/layers_2/pre_attention_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable encoder/layers_2/pre_mlp_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable encoder/layers_3/attention/key/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_3/attention/out/kernel size 1179648 shape (joined_kv=768, embed=1536) partition spec ('model', None) +Variable encoder/layers_3/attention/query/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_3/attention/value/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_3/mlp/wi_0/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable encoder/layers_3/mlp/wi_1/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable encoder/layers_3/mlp/wo/kernel size 6094848 shape (mlp=3968, embed=1536) partition spec ('model', None) +Variable encoder/layers_3/pre_attention_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable encoder/layers_3/pre_mlp_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable encoder/layers_4/attention/key/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_4/attention/out/kernel size 1179648 shape (joined_kv=768, embed=1536) partition spec ('model', None) +Variable encoder/layers_4/attention/query/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_4/attention/value/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_4/mlp/wi_0/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable encoder/layers_4/mlp/wi_1/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable encoder/layers_4/mlp/wo/kernel size 6094848 shape (mlp=3968, embed=1536) partition spec ('model', None) +Variable encoder/layers_4/pre_attention_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable encoder/layers_4/pre_mlp_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable encoder/layers_5/attention/key/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_5/attention/out/kernel size 1179648 shape (joined_kv=768, embed=1536) partition spec ('model', None) +Variable encoder/layers_5/attention/query/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_5/attention/value/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_5/mlp/wi_0/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable encoder/layers_5/mlp/wi_1/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable encoder/layers_5/mlp/wo/kernel size 6094848 shape (mlp=3968, embed=1536) partition spec ('model', None) +Variable encoder/layers_5/pre_attention_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable encoder/layers_5/pre_mlp_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable encoder/layers_6/attention/key/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_6/attention/out/kernel size 1179648 shape (joined_kv=768, embed=1536) partition spec ('model', None) +Variable encoder/layers_6/attention/query/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_6/attention/value/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_6/mlp/wi_0/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable encoder/layers_6/mlp/wi_1/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable encoder/layers_6/mlp/wo/kernel size 6094848 shape (mlp=3968, embed=1536) partition spec ('model', None) +Variable encoder/layers_6/pre_attention_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable encoder/layers_6/pre_mlp_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable encoder/layers_7/attention/key/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_7/attention/out/kernel size 1179648 shape (joined_kv=768, embed=1536) partition spec ('model', None) +Variable encoder/layers_7/attention/query/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_7/attention/value/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_7/mlp/wi_0/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable encoder/layers_7/mlp/wi_1/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable encoder/layers_7/mlp/wo/kernel size 6094848 shape (mlp=3968, embed=1536) partition spec ('model', None) +Variable encoder/layers_7/pre_attention_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable encoder/layers_7/pre_mlp_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable encoder/layers_8/attention/key/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_8/attention/out/kernel size 1179648 shape (joined_kv=768, embed=1536) partition spec ('model', None) +Variable encoder/layers_8/attention/query/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_8/attention/value/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_8/mlp/wi_0/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable encoder/layers_8/mlp/wi_1/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable encoder/layers_8/mlp/wo/kernel size 6094848 shape (mlp=3968, embed=1536) partition spec ('model', None) +Variable encoder/layers_8/pre_attention_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable encoder/layers_8/pre_mlp_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable encoder/layers_9/attention/key/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_9/attention/out/kernel size 1179648 shape (joined_kv=768, embed=1536) partition spec ('model', None) +Variable encoder/layers_9/attention/query/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_9/attention/value/kernel size 1179648 shape (embed=1536, joined_kv=768) partition spec (None, 'model') +Variable encoder/layers_9/mlp/wi_0/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable encoder/layers_9/mlp/wi_1/kernel size 6094848 shape (embed=1536, mlp=3968) partition spec (None, 'model') +Variable encoder/layers_9/mlp/wo/kernel size 6094848 shape (mlp=3968, embed=1536) partition spec ('model', None) +Variable encoder/layers_9/pre_attention_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable encoder/layers_9/pre_mlp_layer_norm/scale size 1536 shape (embed=1536) partition spec (None,) +Variable encoder/relpos_bias/rel_embedding size 384 shape (heads=12, relpos_buckets=32) partition spec ('model', None) +Variable token_embedder/embedding size 589824 shape (vocab=384, embed=1536) partition spec ('model', None) +Total number of parameters: 581653248 + +Variable param_states/decoder/decoder_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/decoder/decoder_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/decoder/decoder_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/decoder/decoder_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_0/encoder_decoder_attention/key/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_0/encoder_decoder_attention/key/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_0/encoder_decoder_attention/key/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_0/encoder_decoder_attention/key/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_0/encoder_decoder_attention/out/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_0/encoder_decoder_attention/out/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_0/encoder_decoder_attention/out/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_0/encoder_decoder_attention/out/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_0/encoder_decoder_attention/query/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_0/encoder_decoder_attention/query/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_0/encoder_decoder_attention/query/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_0/encoder_decoder_attention/query/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_0/encoder_decoder_attention/value/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_0/encoder_decoder_attention/value/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_0/encoder_decoder_attention/value/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_0/encoder_decoder_attention/value/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_0/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_0/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_0/mlp/wi_0/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/decoder/layers_0/mlp/wi_0/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_0/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_0/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_0/mlp/wi_1/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/decoder/layers_0/mlp/wi_1/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_0/mlp/wo/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_0/mlp/wo/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_0/mlp/wo/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/decoder/layers_0/mlp/wo/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_0/pre_cross_attention_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_0/pre_cross_attention_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/decoder/layers_0/pre_cross_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_0/pre_cross_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_0/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_0/pre_mlp_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/decoder/layers_0/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_0/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_0/pre_self_attention_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_0/pre_self_attention_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/decoder/layers_0/pre_self_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_0/pre_self_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_0/self_attention/key/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_0/self_attention/key/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_0/self_attention/key/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_0/self_attention/key/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_0/self_attention/out/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_0/self_attention/out/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_0/self_attention/out/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_0/self_attention/out/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_0/self_attention/query/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_0/self_attention/query/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_0/self_attention/query/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_0/self_attention/query/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_0/self_attention/value/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_0/self_attention/value/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_0/self_attention/value/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_0/self_attention/value/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_1/encoder_decoder_attention/key/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_1/encoder_decoder_attention/key/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_1/encoder_decoder_attention/key/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_1/encoder_decoder_attention/key/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_1/encoder_decoder_attention/out/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_1/encoder_decoder_attention/out/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_1/encoder_decoder_attention/out/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_1/encoder_decoder_attention/out/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_1/encoder_decoder_attention/query/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_1/encoder_decoder_attention/query/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_1/encoder_decoder_attention/query/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_1/encoder_decoder_attention/query/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_1/encoder_decoder_attention/value/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_1/encoder_decoder_attention/value/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_1/encoder_decoder_attention/value/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_1/encoder_decoder_attention/value/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_1/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_1/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_1/mlp/wi_0/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/decoder/layers_1/mlp/wi_0/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_1/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_1/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_1/mlp/wi_1/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/decoder/layers_1/mlp/wi_1/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_1/mlp/wo/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_1/mlp/wo/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_1/mlp/wo/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/decoder/layers_1/mlp/wo/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_1/pre_cross_attention_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_1/pre_cross_attention_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/decoder/layers_1/pre_cross_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_1/pre_cross_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_1/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_1/pre_mlp_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/decoder/layers_1/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_1/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_1/pre_self_attention_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_1/pre_self_attention_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/decoder/layers_1/pre_self_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_1/pre_self_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_1/self_attention/key/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_1/self_attention/key/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_1/self_attention/key/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_1/self_attention/key/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_1/self_attention/out/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_1/self_attention/out/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_1/self_attention/out/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_1/self_attention/out/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_1/self_attention/query/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_1/self_attention/query/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_1/self_attention/query/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_1/self_attention/query/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_1/self_attention/value/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_1/self_attention/value/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_1/self_attention/value/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_1/self_attention/value/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_2/encoder_decoder_attention/key/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_2/encoder_decoder_attention/key/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_2/encoder_decoder_attention/key/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_2/encoder_decoder_attention/key/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_2/encoder_decoder_attention/out/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_2/encoder_decoder_attention/out/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_2/encoder_decoder_attention/out/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_2/encoder_decoder_attention/out/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_2/encoder_decoder_attention/query/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_2/encoder_decoder_attention/query/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_2/encoder_decoder_attention/query/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_2/encoder_decoder_attention/query/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_2/encoder_decoder_attention/value/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_2/encoder_decoder_attention/value/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_2/encoder_decoder_attention/value/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_2/encoder_decoder_attention/value/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_2/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_2/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_2/mlp/wi_0/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/decoder/layers_2/mlp/wi_0/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_2/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_2/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_2/mlp/wi_1/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/decoder/layers_2/mlp/wi_1/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_2/mlp/wo/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_2/mlp/wo/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_2/mlp/wo/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/decoder/layers_2/mlp/wo/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_2/pre_cross_attention_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_2/pre_cross_attention_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/decoder/layers_2/pre_cross_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_2/pre_cross_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_2/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_2/pre_mlp_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/decoder/layers_2/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_2/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_2/pre_self_attention_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_2/pre_self_attention_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/decoder/layers_2/pre_self_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_2/pre_self_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_2/self_attention/key/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_2/self_attention/key/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_2/self_attention/key/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_2/self_attention/key/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_2/self_attention/out/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_2/self_attention/out/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_2/self_attention/out/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_2/self_attention/out/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_2/self_attention/query/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_2/self_attention/query/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_2/self_attention/query/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_2/self_attention/query/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_2/self_attention/value/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_2/self_attention/value/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_2/self_attention/value/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_2/self_attention/value/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_3/encoder_decoder_attention/key/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_3/encoder_decoder_attention/key/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_3/encoder_decoder_attention/key/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_3/encoder_decoder_attention/key/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_3/encoder_decoder_attention/out/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_3/encoder_decoder_attention/out/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_3/encoder_decoder_attention/out/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_3/encoder_decoder_attention/out/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_3/encoder_decoder_attention/query/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_3/encoder_decoder_attention/query/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_3/encoder_decoder_attention/query/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_3/encoder_decoder_attention/query/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_3/encoder_decoder_attention/value/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_3/encoder_decoder_attention/value/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_3/encoder_decoder_attention/value/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_3/encoder_decoder_attention/value/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_3/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_3/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_3/mlp/wi_0/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/decoder/layers_3/mlp/wi_0/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_3/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_3/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_3/mlp/wi_1/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/decoder/layers_3/mlp/wi_1/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_3/mlp/wo/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_3/mlp/wo/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_3/mlp/wo/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/decoder/layers_3/mlp/wo/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_3/pre_cross_attention_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_3/pre_cross_attention_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/decoder/layers_3/pre_cross_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_3/pre_cross_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_3/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_3/pre_mlp_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/decoder/layers_3/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_3/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_3/pre_self_attention_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_3/pre_self_attention_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/decoder/layers_3/pre_self_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_3/pre_self_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_3/self_attention/key/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_3/self_attention/key/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_3/self_attention/key/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_3/self_attention/key/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_3/self_attention/out/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_3/self_attention/out/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_3/self_attention/out/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_3/self_attention/out/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_3/self_attention/query/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_3/self_attention/query/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_3/self_attention/query/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_3/self_attention/query/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_3/self_attention/value/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_3/self_attention/value/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_3/self_attention/value/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_3/self_attention/value/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_4/encoder_decoder_attention/key/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_4/encoder_decoder_attention/key/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_4/encoder_decoder_attention/key/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_4/encoder_decoder_attention/key/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_4/encoder_decoder_attention/out/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_4/encoder_decoder_attention/out/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_4/encoder_decoder_attention/out/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_4/encoder_decoder_attention/out/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_4/encoder_decoder_attention/query/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_4/encoder_decoder_attention/query/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_4/encoder_decoder_attention/query/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_4/encoder_decoder_attention/query/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_4/encoder_decoder_attention/value/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_4/encoder_decoder_attention/value/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_4/encoder_decoder_attention/value/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_4/encoder_decoder_attention/value/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_4/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_4/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_4/mlp/wi_0/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/decoder/layers_4/mlp/wi_0/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_4/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_4/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_4/mlp/wi_1/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/decoder/layers_4/mlp/wi_1/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_4/mlp/wo/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_4/mlp/wo/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_4/mlp/wo/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/decoder/layers_4/mlp/wo/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_4/pre_cross_attention_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_4/pre_cross_attention_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/decoder/layers_4/pre_cross_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_4/pre_cross_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_4/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_4/pre_mlp_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/decoder/layers_4/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_4/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_4/pre_self_attention_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_4/pre_self_attention_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/decoder/layers_4/pre_self_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_4/pre_self_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_4/self_attention/key/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_4/self_attention/key/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_4/self_attention/key/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_4/self_attention/key/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_4/self_attention/out/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_4/self_attention/out/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_4/self_attention/out/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_4/self_attention/out/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_4/self_attention/query/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_4/self_attention/query/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_4/self_attention/query/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_4/self_attention/query/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_4/self_attention/value/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_4/self_attention/value/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_4/self_attention/value/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_4/self_attention/value/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_5/encoder_decoder_attention/key/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_5/encoder_decoder_attention/key/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_5/encoder_decoder_attention/key/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_5/encoder_decoder_attention/key/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_5/encoder_decoder_attention/out/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_5/encoder_decoder_attention/out/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_5/encoder_decoder_attention/out/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_5/encoder_decoder_attention/out/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_5/encoder_decoder_attention/query/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_5/encoder_decoder_attention/query/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_5/encoder_decoder_attention/query/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_5/encoder_decoder_attention/query/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_5/encoder_decoder_attention/value/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_5/encoder_decoder_attention/value/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_5/encoder_decoder_attention/value/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_5/encoder_decoder_attention/value/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_5/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_5/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_5/mlp/wi_0/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/decoder/layers_5/mlp/wi_0/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_5/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_5/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_5/mlp/wi_1/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/decoder/layers_5/mlp/wi_1/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_5/mlp/wo/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_5/mlp/wo/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_5/mlp/wo/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/decoder/layers_5/mlp/wo/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_5/pre_cross_attention_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_5/pre_cross_attention_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/decoder/layers_5/pre_cross_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_5/pre_cross_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_5/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_5/pre_mlp_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/decoder/layers_5/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_5/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_5/pre_self_attention_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_5/pre_self_attention_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/decoder/layers_5/pre_self_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_5/pre_self_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_5/self_attention/key/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_5/self_attention/key/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_5/self_attention/key/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_5/self_attention/key/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_5/self_attention/out/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_5/self_attention/out/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_5/self_attention/out/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_5/self_attention/out/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_5/self_attention/query/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_5/self_attention/query/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_5/self_attention/query/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_5/self_attention/query/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/layers_5/self_attention/value/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_5/self_attention/value/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/layers_5/self_attention/value/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/layers_5/self_attention/value/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/decoder/logits_dense/kernel/m size 1 shape (1,) partition spec None +Variable param_states/decoder/logits_dense/kernel/v size 1 shape (1,) partition spec None +Variable param_states/decoder/logits_dense/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/decoder/logits_dense/kernel/v_row size 384 shape (384,) partition spec None +Variable param_states/decoder/relpos_bias/rel_embedding/m size 1 shape (1,) partition spec None +Variable param_states/decoder/relpos_bias/rel_embedding/v size 384 shape (heads=12, relpos_buckets=32) partition spec ('model', None) +Variable param_states/decoder/relpos_bias/rel_embedding/v_col size 1 shape (1,) partition spec None +Variable param_states/decoder/relpos_bias/rel_embedding/v_row size 1 shape (1,) partition spec None +Variable param_states/encoder/encoder_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/encoder/encoder_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/encoder/encoder_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/encoder/encoder_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_0/attention/key/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_0/attention/key/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_0/attention/key/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_0/attention/key/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_0/attention/out/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_0/attention/out/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_0/attention/out/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_0/attention/out/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_0/attention/query/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_0/attention/query/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_0/attention/query/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_0/attention/query/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_0/attention/value/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_0/attention/value/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_0/attention/value/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_0/attention/value/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_0/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_0/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_0/mlp/wi_0/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_0/mlp/wi_0/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_0/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_0/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_0/mlp/wi_1/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_0/mlp/wi_1/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_0/mlp/wo/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_0/mlp/wo/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_0/mlp/wo/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_0/mlp/wo/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_0/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_0/pre_attention_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/encoder/layers_0/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_0/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_0/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_0/pre_mlp_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/encoder/layers_0/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_0/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_1/attention/key/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_1/attention/key/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_1/attention/key/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_1/attention/key/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_1/attention/out/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_1/attention/out/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_1/attention/out/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_1/attention/out/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_1/attention/query/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_1/attention/query/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_1/attention/query/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_1/attention/query/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_1/attention/value/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_1/attention/value/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_1/attention/value/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_1/attention/value/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_1/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_1/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_1/mlp/wi_0/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_1/mlp/wi_0/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_1/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_1/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_1/mlp/wi_1/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_1/mlp/wi_1/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_1/mlp/wo/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_1/mlp/wo/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_1/mlp/wo/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_1/mlp/wo/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_1/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_1/pre_attention_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/encoder/layers_1/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_1/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_1/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_1/pre_mlp_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/encoder/layers_1/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_1/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_10/attention/key/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_10/attention/key/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_10/attention/key/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_10/attention/key/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_10/attention/out/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_10/attention/out/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_10/attention/out/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_10/attention/out/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_10/attention/query/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_10/attention/query/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_10/attention/query/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_10/attention/query/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_10/attention/value/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_10/attention/value/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_10/attention/value/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_10/attention/value/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_10/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_10/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_10/mlp/wi_0/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_10/mlp/wi_0/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_10/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_10/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_10/mlp/wi_1/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_10/mlp/wi_1/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_10/mlp/wo/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_10/mlp/wo/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_10/mlp/wo/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_10/mlp/wo/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_10/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_10/pre_attention_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/encoder/layers_10/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_10/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_10/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_10/pre_mlp_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/encoder/layers_10/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_10/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_11/attention/key/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_11/attention/key/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_11/attention/key/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_11/attention/key/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_11/attention/out/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_11/attention/out/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_11/attention/out/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_11/attention/out/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_11/attention/query/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_11/attention/query/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_11/attention/query/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_11/attention/query/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_11/attention/value/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_11/attention/value/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_11/attention/value/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_11/attention/value/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_11/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_11/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_11/mlp/wi_0/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_11/mlp/wi_0/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_11/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_11/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_11/mlp/wi_1/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_11/mlp/wi_1/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_11/mlp/wo/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_11/mlp/wo/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_11/mlp/wo/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_11/mlp/wo/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_11/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_11/pre_attention_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/encoder/layers_11/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_11/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_11/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_11/pre_mlp_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/encoder/layers_11/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_11/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_12/attention/key/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_12/attention/key/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_12/attention/key/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_12/attention/key/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_12/attention/out/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_12/attention/out/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_12/attention/out/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_12/attention/out/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_12/attention/query/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_12/attention/query/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_12/attention/query/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_12/attention/query/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_12/attention/value/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_12/attention/value/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_12/attention/value/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_12/attention/value/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_12/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_12/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_12/mlp/wi_0/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_12/mlp/wi_0/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_12/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_12/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_12/mlp/wi_1/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_12/mlp/wi_1/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_12/mlp/wo/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_12/mlp/wo/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_12/mlp/wo/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_12/mlp/wo/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_12/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_12/pre_attention_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/encoder/layers_12/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_12/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_12/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_12/pre_mlp_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/encoder/layers_12/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_12/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_13/attention/key/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_13/attention/key/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_13/attention/key/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_13/attention/key/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_13/attention/out/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_13/attention/out/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_13/attention/out/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_13/attention/out/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_13/attention/query/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_13/attention/query/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_13/attention/query/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_13/attention/query/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_13/attention/value/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_13/attention/value/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_13/attention/value/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_13/attention/value/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_13/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_13/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_13/mlp/wi_0/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_13/mlp/wi_0/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_13/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_13/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_13/mlp/wi_1/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_13/mlp/wi_1/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_13/mlp/wo/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_13/mlp/wo/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_13/mlp/wo/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_13/mlp/wo/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_13/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_13/pre_attention_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/encoder/layers_13/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_13/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_13/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_13/pre_mlp_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/encoder/layers_13/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_13/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_14/attention/key/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_14/attention/key/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_14/attention/key/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_14/attention/key/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_14/attention/out/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_14/attention/out/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_14/attention/out/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_14/attention/out/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_14/attention/query/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_14/attention/query/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_14/attention/query/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_14/attention/query/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_14/attention/value/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_14/attention/value/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_14/attention/value/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_14/attention/value/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_14/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_14/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_14/mlp/wi_0/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_14/mlp/wi_0/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_14/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_14/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_14/mlp/wi_1/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_14/mlp/wi_1/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_14/mlp/wo/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_14/mlp/wo/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_14/mlp/wo/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_14/mlp/wo/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_14/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_14/pre_attention_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/encoder/layers_14/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_14/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_14/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_14/pre_mlp_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/encoder/layers_14/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_14/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_15/attention/key/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_15/attention/key/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_15/attention/key/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_15/attention/key/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_15/attention/out/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_15/attention/out/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_15/attention/out/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_15/attention/out/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_15/attention/query/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_15/attention/query/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_15/attention/query/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_15/attention/query/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_15/attention/value/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_15/attention/value/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_15/attention/value/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_15/attention/value/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_15/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_15/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_15/mlp/wi_0/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_15/mlp/wi_0/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_15/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_15/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_15/mlp/wi_1/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_15/mlp/wi_1/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_15/mlp/wo/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_15/mlp/wo/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_15/mlp/wo/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_15/mlp/wo/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_15/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_15/pre_attention_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/encoder/layers_15/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_15/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_15/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_15/pre_mlp_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/encoder/layers_15/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_15/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_16/attention/key/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_16/attention/key/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_16/attention/key/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_16/attention/key/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_16/attention/out/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_16/attention/out/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_16/attention/out/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_16/attention/out/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_16/attention/query/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_16/attention/query/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_16/attention/query/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_16/attention/query/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_16/attention/value/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_16/attention/value/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_16/attention/value/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_16/attention/value/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_16/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_16/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_16/mlp/wi_0/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_16/mlp/wi_0/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_16/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_16/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_16/mlp/wi_1/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_16/mlp/wi_1/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_16/mlp/wo/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_16/mlp/wo/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_16/mlp/wo/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_16/mlp/wo/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_16/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_16/pre_attention_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/encoder/layers_16/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_16/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_16/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_16/pre_mlp_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/encoder/layers_16/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_16/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_17/attention/key/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_17/attention/key/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_17/attention/key/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_17/attention/key/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_17/attention/out/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_17/attention/out/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_17/attention/out/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_17/attention/out/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_17/attention/query/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_17/attention/query/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_17/attention/query/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_17/attention/query/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_17/attention/value/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_17/attention/value/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_17/attention/value/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_17/attention/value/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_17/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_17/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_17/mlp/wi_0/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_17/mlp/wi_0/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_17/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_17/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_17/mlp/wi_1/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_17/mlp/wi_1/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_17/mlp/wo/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_17/mlp/wo/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_17/mlp/wo/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_17/mlp/wo/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_17/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_17/pre_attention_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/encoder/layers_17/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_17/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_17/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_17/pre_mlp_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/encoder/layers_17/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_17/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_2/attention/key/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_2/attention/key/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_2/attention/key/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_2/attention/key/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_2/attention/out/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_2/attention/out/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_2/attention/out/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_2/attention/out/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_2/attention/query/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_2/attention/query/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_2/attention/query/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_2/attention/query/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_2/attention/value/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_2/attention/value/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_2/attention/value/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_2/attention/value/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_2/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_2/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_2/mlp/wi_0/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_2/mlp/wi_0/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_2/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_2/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_2/mlp/wi_1/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_2/mlp/wi_1/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_2/mlp/wo/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_2/mlp/wo/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_2/mlp/wo/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_2/mlp/wo/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_2/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_2/pre_attention_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/encoder/layers_2/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_2/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_2/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_2/pre_mlp_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/encoder/layers_2/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_2/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_3/attention/key/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_3/attention/key/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_3/attention/key/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_3/attention/key/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_3/attention/out/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_3/attention/out/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_3/attention/out/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_3/attention/out/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_3/attention/query/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_3/attention/query/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_3/attention/query/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_3/attention/query/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_3/attention/value/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_3/attention/value/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_3/attention/value/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_3/attention/value/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_3/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_3/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_3/mlp/wi_0/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_3/mlp/wi_0/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_3/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_3/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_3/mlp/wi_1/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_3/mlp/wi_1/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_3/mlp/wo/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_3/mlp/wo/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_3/mlp/wo/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_3/mlp/wo/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_3/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_3/pre_attention_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/encoder/layers_3/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_3/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_3/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_3/pre_mlp_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/encoder/layers_3/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_3/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_4/attention/key/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_4/attention/key/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_4/attention/key/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_4/attention/key/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_4/attention/out/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_4/attention/out/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_4/attention/out/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_4/attention/out/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_4/attention/query/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_4/attention/query/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_4/attention/query/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_4/attention/query/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_4/attention/value/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_4/attention/value/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_4/attention/value/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_4/attention/value/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_4/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_4/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_4/mlp/wi_0/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_4/mlp/wi_0/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_4/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_4/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_4/mlp/wi_1/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_4/mlp/wi_1/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_4/mlp/wo/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_4/mlp/wo/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_4/mlp/wo/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_4/mlp/wo/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_4/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_4/pre_attention_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/encoder/layers_4/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_4/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_4/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_4/pre_mlp_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/encoder/layers_4/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_4/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_5/attention/key/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_5/attention/key/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_5/attention/key/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_5/attention/key/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_5/attention/out/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_5/attention/out/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_5/attention/out/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_5/attention/out/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_5/attention/query/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_5/attention/query/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_5/attention/query/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_5/attention/query/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_5/attention/value/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_5/attention/value/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_5/attention/value/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_5/attention/value/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_5/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_5/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_5/mlp/wi_0/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_5/mlp/wi_0/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_5/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_5/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_5/mlp/wi_1/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_5/mlp/wi_1/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_5/mlp/wo/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_5/mlp/wo/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_5/mlp/wo/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_5/mlp/wo/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_5/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_5/pre_attention_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/encoder/layers_5/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_5/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_5/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_5/pre_mlp_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/encoder/layers_5/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_5/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_6/attention/key/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_6/attention/key/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_6/attention/key/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_6/attention/key/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_6/attention/out/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_6/attention/out/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_6/attention/out/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_6/attention/out/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_6/attention/query/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_6/attention/query/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_6/attention/query/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_6/attention/query/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_6/attention/value/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_6/attention/value/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_6/attention/value/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_6/attention/value/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_6/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_6/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_6/mlp/wi_0/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_6/mlp/wi_0/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_6/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_6/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_6/mlp/wi_1/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_6/mlp/wi_1/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_6/mlp/wo/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_6/mlp/wo/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_6/mlp/wo/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_6/mlp/wo/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_6/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_6/pre_attention_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/encoder/layers_6/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_6/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_6/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_6/pre_mlp_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/encoder/layers_6/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_6/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_7/attention/key/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_7/attention/key/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_7/attention/key/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_7/attention/key/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_7/attention/out/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_7/attention/out/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_7/attention/out/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_7/attention/out/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_7/attention/query/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_7/attention/query/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_7/attention/query/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_7/attention/query/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_7/attention/value/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_7/attention/value/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_7/attention/value/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_7/attention/value/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_7/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_7/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_7/mlp/wi_0/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_7/mlp/wi_0/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_7/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_7/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_7/mlp/wi_1/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_7/mlp/wi_1/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_7/mlp/wo/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_7/mlp/wo/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_7/mlp/wo/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_7/mlp/wo/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_7/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_7/pre_attention_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/encoder/layers_7/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_7/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_7/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_7/pre_mlp_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/encoder/layers_7/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_7/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_8/attention/key/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_8/attention/key/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_8/attention/key/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_8/attention/key/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_8/attention/out/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_8/attention/out/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_8/attention/out/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_8/attention/out/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_8/attention/query/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_8/attention/query/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_8/attention/query/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_8/attention/query/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_8/attention/value/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_8/attention/value/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_8/attention/value/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_8/attention/value/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_8/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_8/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_8/mlp/wi_0/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_8/mlp/wi_0/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_8/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_8/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_8/mlp/wi_1/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_8/mlp/wi_1/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_8/mlp/wo/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_8/mlp/wo/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_8/mlp/wo/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_8/mlp/wo/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_8/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_8/pre_attention_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/encoder/layers_8/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_8/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_8/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_8/pre_mlp_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/encoder/layers_8/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_8/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_9/attention/key/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_9/attention/key/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_9/attention/key/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_9/attention/key/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_9/attention/out/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_9/attention/out/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_9/attention/out/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_9/attention/out/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_9/attention/query/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_9/attention/query/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_9/attention/query/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_9/attention/query/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_9/attention/value/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_9/attention/value/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_9/attention/value/kernel/v_col size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_9/attention/value/kernel/v_row size 768 shape (768,) partition spec None +Variable param_states/encoder/layers_9/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_9/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_9/mlp/wi_0/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_9/mlp/wi_0/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_9/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_9/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_9/mlp/wi_1/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_9/mlp/wi_1/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_9/mlp/wo/kernel/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_9/mlp/wo/kernel/v size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_9/mlp/wo/kernel/v_col size 3968 shape (3968,) partition spec None +Variable param_states/encoder/layers_9/mlp/wo/kernel/v_row size 1536 shape (1536,) partition spec None +Variable param_states/encoder/layers_9/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_9/pre_attention_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/encoder/layers_9/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_9/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_9/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_9/pre_mlp_layer_norm/scale/v size 1536 shape (embed=1536) partition spec (None,) +Variable param_states/encoder/layers_9/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None +Variable param_states/encoder/layers_9/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None +Variable param_states/encoder/relpos_bias/rel_embedding/m size 1 shape (1,) partition spec None +Variable param_states/encoder/relpos_bias/rel_embedding/v size 384 shape (heads=12, relpos_buckets=32) partition spec ('model', None) +Variable param_states/encoder/relpos_bias/rel_embedding/v_col size 1 shape (1,) partition spec None +Variable param_states/encoder/relpos_bias/rel_embedding/v_row size 1 shape (1,) partition spec None +Variable param_states/token_embedder/embedding/m size 1 shape (1,) partition spec None +Variable param_states/token_embedder/embedding/v size 1 shape (1,) partition spec None +Variable param_states/token_embedder/embedding/v_col size 1536 shape (1536,) partition spec None +Variable param_states/token_embedder/embedding/v_row size 384 shape (384,) partition spec None +Variable step size 1 shape () partition spec None diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..ccd82cceace616a60eb54886cf3a051c53ec7e74 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1 @@ +{"eos_token": {"content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "unk_token": {"content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "pad_token": {"content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "additional_special_tokens": ["", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""]} \ No newline at end of file diff --git a/start_train.sh b/start_train.sh new file mode 100644 index 0000000000000000000000000000000000000000..86a629f9e4149f52f5fa21d465dc4f582a30a971 --- /dev/null +++ b/start_train.sh @@ -0,0 +1,12 @@ +# set train hyperparams +unset LD_PRELOAD + +PROJECT_DIR="/researchdisk/byt5-base-finnish" +T5X_DIR=${HOME}"/t5x" # directory where the t5x is cloned. +MODEL_DIR="/researchdisk/byt5-base-finnish" +export PYTHONPATH=${PROJECT_DIR} + +python3 ${T5X_DIR}/t5x/train.py \ + --gin_search_paths=${PROJECT_DIR} \ + --gin_file="base_pretrain.gin" \ + --gin.MODEL_DIR=\"${MODEL_DIR}\" \ No newline at end of file diff --git a/tasks.py b/tasks.py new file mode 100644 index 0000000000000000000000000000000000000000..a6b0e42aaf7a10f639a5788a118501b289a570e8 --- /dev/null +++ b/tasks.py @@ -0,0 +1,86 @@ +# adapted from https://huggingface.co/pere/pk-nb-t5x/blob/main/tasks.py + +import functools + +import seqio +import tensorflow as tf +import t5.data +from datasets import load_dataset, load_from_disk +from t5.data import postprocessors +from t5.data import preprocessors +from t5.evaluation import metrics +from seqio import FunctionDataSource, utils + +# ======================= ByT5 ======================= +MEAN_NOISE_SPAN_LENGTH = 20 +vocabulary = seqio.ByteVocabulary() +# ==================================================== + +TaskRegistry = seqio.TaskRegistry + +DEFAULT_OUTPUT_FEATURES = { + "inputs": seqio.Feature( + vocabulary=vocabulary, + required=False), + "targets": seqio.Feature( + vocabulary=vocabulary) +} + + +def gen_dataset(split, shuffle=False, seed=None, column="text", dataset=None): + if shuffle: + if seed: + dataset = dataset.shuffle(seed=seed) + else: + dataset = dataset.shuffle() + while True: + for item in dataset[str(split)]: + yield item[column] + + +def dataset_fn(split, shuffle_files, seed=None, dataset=None): + return tf.data.Dataset.from_generator( + functools.partial(gen_dataset, split, shuffle_files, seed, dataset=dataset), + output_signature=tf.TensorSpec(shape=(), dtype=tf.string, name=dataset_name) + ) + + +@utils.map_over_dataset +def target_to_key(x, key_map, target_key): + """Assign the value from the dataset to target_key in key_map""" + return {**key_map, target_key: x} + + +# Final pretraining task used in Raffel et al., 2019 adaptated to our use +dataset_name = "/researchdisk/lm_training_dataset_full" +dataset_params = {"from_disk_path": dataset_name} + +if "from_disk_path" in dataset_params: + dataset = load_from_disk(dataset_params.get("from_disk_path")) +else: + dataset = load_dataset(**dataset_params) + +dataset_shapes = {"train": dataset["train"].num_rows, "validation": dataset["validation"].num_rows} +TaskRegistry.add( + "byt5_pretrain_finnish", + source=seqio.FunctionDataSource( + dataset_fn=functools.partial(dataset_fn, dataset=dataset), + splits=("train", "validation"), + caching_permitted=False, + num_input_examples=dataset_shapes, + ), + preprocessors=[ + functools.partial( + target_to_key, key_map={ + "inputs": None, + "targets": None, + }, target_key="targets"), + seqio.preprocessors.tokenize, + # seqio.CacheDatasetPlaceholder(), + functools.partial(preprocessors.span_corruption, + mean_noise_span_length=MEAN_NOISE_SPAN_LENGTH), + seqio.preprocessors.append_eos_after_trim, + ], + output_features={"targets": DEFAULT_OUTPUT_FEATURES["targets"]}, + metric_fns=[metrics.accuracy] +) \ No newline at end of file diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..512088e166d430a4d25c7b9c6d498a494a16d5b3 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1 @@ +{"eos_token": {"content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "unk_token": {"content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "pad_token": {"content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "extra_ids": 125, "additional_special_tokens": ["", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""]} \ No newline at end of file diff --git a/train/events.out.tfevents.1651151362.t1v-n-9798b699-w-0.1022149.0.v2 b/train/events.out.tfevents.1651151362.t1v-n-9798b699-w-0.1022149.0.v2 new file mode 100644 index 0000000000000000000000000000000000000000..affc53d0a93ac71bf2f4a212896e82dc51deb2fa --- /dev/null +++ b/train/events.out.tfevents.1651151362.t1v-n-9798b699-w-0.1022149.0.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c00263b9938d60c6d3d8763e61c53ecf250953962dbc3754d8af47f415284620 +size 7414 diff --git a/training_eval/byt5_pretrain_finnish/events.out.tfevents.1651151362.t1v-n-9798b699-w-0.1022149.1.v2 b/training_eval/byt5_pretrain_finnish/events.out.tfevents.1651151362.t1v-n-9798b699-w-0.1022149.1.v2 new file mode 100644 index 0000000000000000000000000000000000000000..f7974de24a71320e8f65f22f22016b10097e6710 --- /dev/null +++ b/training_eval/byt5_pretrain_finnish/events.out.tfevents.1651151362.t1v-n-9798b699-w-0.1022149.1.v2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6f993dad92dcc8a5fd99820bffd68cbcfeab864068393a6ea0923cbd79d2dd7 +size 1414