Add TGPT checkpoint
Browse files- checkpoint_29760/_CHECKPOINT_METADATA +1 -0
- checkpoint_29760/_METADATA +1 -0
- checkpoint_29760/_sharding +1 -0
- checkpoint_29760/array_metadatas/process_0 +1 -0
- checkpoint_29760/d/e3bc2af06cdf2f0397e6ef376a4170c1 +0 -0
- checkpoint_29760/manifest.ocdbt +0 -0
- checkpoint_29760/ocdbt.process_0/d/007ebc530249f4a7e697a549823ba7aa +0 -0
- checkpoint_29760/ocdbt.process_0/d/0435fabb3556a406e1fe330017c829d9 +0 -0
- checkpoint_29760/ocdbt.process_0/d/1222a19a42ab47b108835795b9a0c803 +0 -0
- checkpoint_29760/ocdbt.process_0/d/204736c1c49d2f32c811129bcb179905 +0 -0
- checkpoint_29760/ocdbt.process_0/d/2341f560acb9747fc83784ef70b63fa7 +0 -0
- checkpoint_29760/ocdbt.process_0/d/28404e7dcc9548c1a308ae65e54938a7 +0 -0
- checkpoint_29760/ocdbt.process_0/d/2aa58ee48f550d5e272a56492ba20a90 +0 -0
- checkpoint_29760/ocdbt.process_0/d/3ae445e867d7406c9482798cdf2a4d50 +0 -0
- checkpoint_29760/ocdbt.process_0/d/3d99b0183fcecf1766782be926c1cf83 +0 -0
- checkpoint_29760/ocdbt.process_0/d/4615740e84880ff890077b63c93b4ccf +0 -0
- checkpoint_29760/ocdbt.process_0/d/4d7a33651229d78c32da9d630117e04b +0 -0
- checkpoint_29760/ocdbt.process_0/d/573bd826d21e3cb76dfa64475510b1b3 +0 -0
- checkpoint_29760/ocdbt.process_0/d/5c51741152faf45376bbf8a6e991f966 +0 -0
- checkpoint_29760/ocdbt.process_0/d/5f3d29b80f2fb08df9940e3b3723e173 +0 -0
- checkpoint_29760/ocdbt.process_0/d/63a1543f5225b32c8974a5e978919fe4 +0 -0
- checkpoint_29760/ocdbt.process_0/d/640bbe82b19b0d599873f9f4bab6543f +0 -0
- checkpoint_29760/ocdbt.process_0/d/646140bd23749a65703e13bb61371037 +0 -0
- checkpoint_29760/ocdbt.process_0/d/6951ca1cf66a0cb66451e1d17af198ba +0 -0
- checkpoint_29760/ocdbt.process_0/d/69fb40086f78aeef78a3086c1b48fc65 +0 -0
- checkpoint_29760/ocdbt.process_0/d/6b71a602d9e8c6cdade99432a9812e70 +0 -0
- checkpoint_29760/ocdbt.process_0/d/6d68861d3b2200aa51e2e536c3860397 +0 -0
- checkpoint_29760/ocdbt.process_0/d/7c0bf0782c588385dc35a84e840f3495 +0 -0
- checkpoint_29760/ocdbt.process_0/d/81b6302e883bbfa676d547ad9ca5772c +0 -0
- checkpoint_29760/ocdbt.process_0/d/91dd4f162a27fcdc5399b47a45f1218b +0 -0
- checkpoint_29760/ocdbt.process_0/d/988323db7b4ce92c09d78c4d7cb7ef0e +0 -0
- checkpoint_29760/ocdbt.process_0/d/9aa96aaf46e7dca9aca5b851a700a530 +0 -0
- checkpoint_29760/ocdbt.process_0/d/a0739aad34a1dfcbe8bbb353f8f3c1a3 +0 -0
- checkpoint_29760/ocdbt.process_0/d/ab9502043fa886ae9be7cf67b6c60f3d +0 -0
- checkpoint_29760/ocdbt.process_0/d/ac4b1dcaada37bf3f2d86b884ab65960 +0 -0
- checkpoint_29760/ocdbt.process_0/d/b432c8656fbca78b2c8198cb1e061992 +0 -0
- checkpoint_29760/ocdbt.process_0/d/b4a833b5ba8c4f248ccafe7f36102102 +0 -0
- checkpoint_29760/ocdbt.process_0/d/ba7c61d442230c2e7ecfc26a56dd82bb +0 -0
- checkpoint_29760/ocdbt.process_0/d/bc1d22ff07b62538de250b068e3f4f21 +0 -0
- checkpoint_29760/ocdbt.process_0/d/be4f8fbda5a1209e26e16ef7cb526f28 +0 -0
- checkpoint_29760/ocdbt.process_0/d/c3c090c9be044d9708b61feb3de61af2 +0 -0
- checkpoint_29760/ocdbt.process_0/d/cfcbf9756b7f7cb2907d4282c74f4dc3 +0 -0
- checkpoint_29760/ocdbt.process_0/d/d6ed69df3c31be7b72db2d9770b570a3 +0 -0
- checkpoint_29760/ocdbt.process_0/d/fbef88da640e44233eef20f7c62a1421 +0 -0
- checkpoint_29760/ocdbt.process_0/manifest.ocdbt +0 -0
- meta.json +1 -1
checkpoint_29760/_CHECKPOINT_METADATA
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"item_handlers": "orbax.checkpoint._src.handlers.pytree_checkpoint_handler.PyTreeCheckpointHandler", "metrics": {}, "performance_metrics": {}, "init_timestamp_nsecs": 1758139235902924561, "commit_timestamp_nsecs": 1758139236181792555, "custom_metadata": {}}
|
checkpoint_29760/_METADATA
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"tree_metadata": {"('step',)": {"key_metadata": [{"key": "step", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('params', 'in_proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "in_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'in_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "in_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2, 16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'Dense_0', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('params', 'isocoder', 'AttentionBlock_0', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 64]}}, "('params', 'isocoder', 'AttentionBlock_0', 'Dense_1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_0', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_0', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_1', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'k_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'out_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'q_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'v_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'Dense_0', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('params', 'isocoder', 'AttentionBlock_1', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 64]}}, "('params', 'isocoder', 'AttentionBlock_1', 'Dense_1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_0', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_0', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_1', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'k_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'out_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'q_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'v_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('params', 'ln', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "ln", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'ln', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "ln", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'out_proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('params', 'out_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 2]}}, "('params', 'time_proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "time_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'time_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "time_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('opt_state', '0')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}], "value_metadata": {"value_type": "None", "skip_deserialize": true}}, "('opt_state', '1', '0', 'count')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "count", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('opt_state', '1', '0', 'mu', 'in_proj', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "in_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'in_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "in_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'Dense_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 64]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'Dense_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_0', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_1', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'k_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'out_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'q_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'v_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'Dense_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 64]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'Dense_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_0', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_1', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'k_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'out_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'q_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'v_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'mu', 'ln', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "ln", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'ln', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "ln", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'out_proj', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('opt_state', '1', '0', 'mu', 'out_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 2]}}, "('opt_state', '1', '0', 'mu', 'time_proj', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "time_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'time_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "time_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('opt_state', '1', '0', 'nu', 'in_proj', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "in_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'in_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "in_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'Dense_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 64]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'Dense_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_0', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_1', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'k_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'out_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'q_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'v_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'Dense_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 64]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'Dense_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_0', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_1', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'k_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'out_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'q_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'v_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'nu', 'ln', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "ln", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'ln', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "ln", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'out_proj', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('opt_state', '1', '0', 'nu', 'out_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 2]}}, "('opt_state', '1', '0', 'nu', 'time_proj', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "time_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'time_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "time_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('opt_state', '1', '1')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "1", "key_type": 1}], "value_metadata": {"value_type": "None", "skip_deserialize": true}}, "('opt_state', '1', '2')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "2", "key_type": 1}], "value_metadata": {"value_type": "None", "skip_deserialize": true}}, "('rng',)": {"key_metadata": [{"key": "rng", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}}, "use_zarr3": false, "store_array_data_equal_to_fill_value": true, "custom_metadata": null}
|
checkpoint_29760/_sharding
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"b3B0X3N0YXRlLjEuMC5jb3VudA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS50aW1lX3Byb2ouYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS50aW1lX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pbl9wcm9qLmJpYXM=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pbl9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLk1IQV8wLm91dF9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLk1IQV8wLmtfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLk1IQV8wLnFfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLk1IQV8wLnZfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkRlbnNlXzAuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkRlbnNlXzAua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkRlbnNlXzEuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkRlbnNlXzEua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkxheWVyTm9ybc6yXzAuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkxheWVyTm9ybc6yXzAuc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkxheWVyTm9ybc6yXzEuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkxheWVyTm9ybc6yXzEuc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLk1IQV8wLm91dF9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLk1IQV8wLmtfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLk1IQV8wLnFfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLk1IQV8wLnZfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkRlbnNlXzAuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkRlbnNlXzAua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkRlbnNlXzEuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkRlbnNlXzEua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkxheWVyTm9ybc6yXzAuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkxheWVyTm9ybc6yXzAuc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkxheWVyTm9ybc6yXzEuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkxheWVyTm9ybc6yXzEuc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5sbi5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5sbi5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5vdXRfcHJvai5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5vdXRfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS50aW1lX3Byb2ouYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS50aW1lX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pbl9wcm9qLmJpYXM=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pbl9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLk1IQV8wLm91dF9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLk1IQV8wLmtfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLk1IQV8wLnFfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLk1IQV8wLnZfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkRlbnNlXzAuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkRlbnNlXzAua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkRlbnNlXzEuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkRlbnNlXzEua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkxheWVyTm9ybc6yXzAuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkxheWVyTm9ybc6yXzAuc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkxheWVyTm9ybc6yXzEuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkxheWVyTm9ybc6yXzEuc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLk1IQV8wLm91dF9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLk1IQV8wLmtfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLk1IQV8wLnFfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLk1IQV8wLnZfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkRlbnNlXzAuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkRlbnNlXzAua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkRlbnNlXzEuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkRlbnNlXzEua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkxheWVyTm9ybc6yXzAuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkxheWVyTm9ybc6yXzAuc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkxheWVyTm9ybc6yXzEuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkxheWVyTm9ybc6yXzEuc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5sbi5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5sbi5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5vdXRfcHJvai5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5vdXRfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","c3RlcA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLm91dF9wcm9qLmJpYXM=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLm91dF9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmluX3Byb2ouYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmluX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuRGVuc2VfMC5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuRGVuc2VfMC5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuRGVuc2VfMS5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuRGVuc2VfMS5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuTGF5ZXJOb3JtzrJfMC5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuTGF5ZXJOb3JtzrJfMC5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuTGF5ZXJOb3JtzrJfMS5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuTGF5ZXJOb3JtzrJfMS5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuTUhBXzAua19wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuTUhBXzAub3V0X3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuTUhBXzAucV9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuTUhBXzAudl9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuRGVuc2VfMC5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuRGVuc2VfMC5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuRGVuc2VfMS5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuRGVuc2VfMS5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuTGF5ZXJOb3JtzrJfMC5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuTGF5ZXJOb3JtzrJfMC5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuTGF5ZXJOb3JtzrJfMS5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuTGF5ZXJOb3JtzrJfMS5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuTUhBXzAua19wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuTUhBXzAub3V0X3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuTUhBXzAucV9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuTUhBXzAudl9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmxuLmJpYXM=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmxuLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLnRpbWVfcHJvai5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLnRpbWVfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cm5n":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}"}
|
checkpoint_29760/array_metadatas/process_0
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"array_metadatas": [{"array_metadata": {"param_name": "step", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.in_proj.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.in_proj.kernel", "write_shape": [2, 16], "chunk_shape": [2, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.Dense_0.bias", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.Dense_0.kernel", "write_shape": [16, 64], "chunk_shape": [16, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.Dense_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.Dense_1.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.LayerNorm\u03b2_0.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.LayerNorm\u03b2_0.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.LayerNorm\u03b2_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.LayerNorm\u03b2_1.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.MHA_0.k_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.MHA_0.out_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.MHA_0.q_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.MHA_0.v_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.Dense_0.bias", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.Dense_0.kernel", "write_shape": [16, 64], "chunk_shape": [16, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.Dense_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.Dense_1.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.LayerNorm\u03b2_0.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.LayerNorm\u03b2_0.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.LayerNorm\u03b2_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.LayerNorm\u03b2_1.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.MHA_0.k_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.MHA_0.out_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.MHA_0.q_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.MHA_0.v_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.ln.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.ln.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.out_proj.bias", "write_shape": [2], "chunk_shape": [2], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.out_proj.kernel", "write_shape": [16, 2], "chunk_shape": [16, 2], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.time_proj.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.time_proj.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.count", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.in_proj.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.in_proj.kernel", "write_shape": [2, 16], "chunk_shape": [2, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.Dense_0.bias", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.Dense_0.kernel", "write_shape": [16, 64], "chunk_shape": [16, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.Dense_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.Dense_1.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.LayerNorm\u03b2_0.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.LayerNorm\u03b2_0.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.LayerNorm\u03b2_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.LayerNorm\u03b2_1.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.MHA_0.k_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.MHA_0.out_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.MHA_0.q_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.MHA_0.v_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.Dense_0.bias", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.Dense_0.kernel", "write_shape": [16, 64], "chunk_shape": [16, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.Dense_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.Dense_1.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.LayerNorm\u03b2_0.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.LayerNorm\u03b2_0.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.LayerNorm\u03b2_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.LayerNorm\u03b2_1.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.MHA_0.k_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.MHA_0.out_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.MHA_0.q_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.MHA_0.v_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.ln.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.ln.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.out_proj.bias", "write_shape": [2], "chunk_shape": [2], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.out_proj.kernel", "write_shape": [16, 2], "chunk_shape": [16, 2], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.time_proj.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.time_proj.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.in_proj.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.in_proj.kernel", "write_shape": [2, 16], "chunk_shape": [2, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.Dense_0.bias", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.Dense_0.kernel", "write_shape": [16, 64], "chunk_shape": [16, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.Dense_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.Dense_1.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.LayerNorm\u03b2_0.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.LayerNorm\u03b2_0.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.LayerNorm\u03b2_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.LayerNorm\u03b2_1.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.MHA_0.k_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.MHA_0.out_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.MHA_0.q_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.MHA_0.v_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.Dense_0.bias", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.Dense_0.kernel", "write_shape": [16, 64], "chunk_shape": [16, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.Dense_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.Dense_1.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.LayerNorm\u03b2_0.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.LayerNorm\u03b2_0.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.LayerNorm\u03b2_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.LayerNorm\u03b2_1.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.MHA_0.k_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.MHA_0.out_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.MHA_0.q_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.MHA_0.v_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.ln.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.ln.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.out_proj.bias", "write_shape": [2], "chunk_shape": [2], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.out_proj.kernel", "write_shape": [16, 2], "chunk_shape": [16, 2], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.time_proj.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.time_proj.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "rng", "write_shape": [2], "chunk_shape": [2], "ext_metadata": null}}]}
|
checkpoint_29760/d/e3bc2af06cdf2f0397e6ef376a4170c1
ADDED
|
Binary file (25 kB). View file
|
|
|
checkpoint_29760/manifest.ocdbt
ADDED
|
Binary file (118 Bytes). View file
|
|
|
checkpoint_29760/ocdbt.process_0/d/007ebc530249f4a7e697a549823ba7aa
ADDED
|
Binary file (610 Bytes). View file
|
|
|
checkpoint_29760/ocdbt.process_0/d/0435fabb3556a406e1fe330017c829d9
ADDED
|
Binary file (674 Bytes). View file
|
|
|
checkpoint_29760/ocdbt.process_0/d/1222a19a42ab47b108835795b9a0c803
ADDED
|
Binary file (614 Bytes). View file
|
|
|
checkpoint_29760/ocdbt.process_0/d/204736c1c49d2f32c811129bcb179905
ADDED
|
Binary file (623 Bytes). View file
|
|
|
checkpoint_29760/ocdbt.process_0/d/2341f560acb9747fc83784ef70b63fa7
ADDED
|
Binary file (652 Bytes). View file
|
|
|
checkpoint_29760/ocdbt.process_0/d/28404e7dcc9548c1a308ae65e54938a7
ADDED
|
Binary file (657 Bytes). View file
|
|
|
checkpoint_29760/ocdbt.process_0/d/2aa58ee48f550d5e272a56492ba20a90
ADDED
|
Binary file (611 Bytes). View file
|
|
|
checkpoint_29760/ocdbt.process_0/d/3ae445e867d7406c9482798cdf2a4d50
ADDED
|
Binary file (486 Bytes). View file
|
|
|
checkpoint_29760/ocdbt.process_0/d/3d99b0183fcecf1766782be926c1cf83
ADDED
|
Binary file (640 Bytes). View file
|
|
|
checkpoint_29760/ocdbt.process_0/d/4615740e84880ff890077b63c93b4ccf
ADDED
|
Binary file (1.35 kB). View file
|
|
|
checkpoint_29760/ocdbt.process_0/d/4d7a33651229d78c32da9d630117e04b
ADDED
|
Binary file (632 Bytes). View file
|
|
|
checkpoint_29760/ocdbt.process_0/d/573bd826d21e3cb76dfa64475510b1b3
ADDED
|
Binary file (660 Bytes). View file
|
|
|
checkpoint_29760/ocdbt.process_0/d/5c51741152faf45376bbf8a6e991f966
ADDED
|
Binary file (659 Bytes). View file
|
|
|
checkpoint_29760/ocdbt.process_0/d/5f3d29b80f2fb08df9940e3b3723e173
ADDED
|
Binary file (680 Bytes). View file
|
|
|
checkpoint_29760/ocdbt.process_0/d/63a1543f5225b32c8974a5e978919fe4
ADDED
|
Binary file (51 kB). View file
|
|
|
checkpoint_29760/ocdbt.process_0/d/640bbe82b19b0d599873f9f4bab6543f
ADDED
|
Binary file (171 Bytes). View file
|
|
|
checkpoint_29760/ocdbt.process_0/d/646140bd23749a65703e13bb61371037
ADDED
|
Binary file (610 Bytes). View file
|
|
|
checkpoint_29760/ocdbt.process_0/d/6951ca1cf66a0cb66451e1d17af198ba
ADDED
|
Binary file (681 Bytes). View file
|
|
|
checkpoint_29760/ocdbt.process_0/d/69fb40086f78aeef78a3086c1b48fc65
ADDED
|
Binary file (656 Bytes). View file
|
|
|
checkpoint_29760/ocdbt.process_0/d/6b71a602d9e8c6cdade99432a9812e70
ADDED
|
Binary file (607 Bytes). View file
|
|
|
checkpoint_29760/ocdbt.process_0/d/6d68861d3b2200aa51e2e536c3860397
ADDED
|
Binary file (697 Bytes). View file
|
|
|
checkpoint_29760/ocdbt.process_0/d/7c0bf0782c588385dc35a84e840f3495
ADDED
|
Binary file (451 Bytes). View file
|
|
|
checkpoint_29760/ocdbt.process_0/d/81b6302e883bbfa676d547ad9ca5772c
ADDED
|
Binary file (676 Bytes). View file
|
|
|
checkpoint_29760/ocdbt.process_0/d/91dd4f162a27fcdc5399b47a45f1218b
ADDED
|
Binary file (13.3 kB). View file
|
|
|
checkpoint_29760/ocdbt.process_0/d/988323db7b4ce92c09d78c4d7cb7ef0e
ADDED
|
Binary file (671 Bytes). View file
|
|
|
checkpoint_29760/ocdbt.process_0/d/9aa96aaf46e7dca9aca5b851a700a530
ADDED
|
Binary file (650 Bytes). View file
|
|
|
checkpoint_29760/ocdbt.process_0/d/a0739aad34a1dfcbe8bbb353f8f3c1a3
ADDED
|
Binary file (682 Bytes). View file
|
|
|
checkpoint_29760/ocdbt.process_0/d/ab9502043fa886ae9be7cf67b6c60f3d
ADDED
|
Binary file (658 Bytes). View file
|
|
|
checkpoint_29760/ocdbt.process_0/d/ac4b1dcaada37bf3f2d86b884ab65960
ADDED
|
Binary file (671 Bytes). View file
|
|
|
checkpoint_29760/ocdbt.process_0/d/b432c8656fbca78b2c8198cb1e061992
ADDED
|
Binary file (657 Bytes). View file
|
|
|
checkpoint_29760/ocdbt.process_0/d/b4a833b5ba8c4f248ccafe7f36102102
ADDED
|
Binary file (599 Bytes). View file
|
|
|
checkpoint_29760/ocdbt.process_0/d/ba7c61d442230c2e7ecfc26a56dd82bb
ADDED
|
Binary file (623 Bytes). View file
|
|
|
checkpoint_29760/ocdbt.process_0/d/bc1d22ff07b62538de250b068e3f4f21
ADDED
|
Binary file (618 Bytes). View file
|
|
|
checkpoint_29760/ocdbt.process_0/d/be4f8fbda5a1209e26e16ef7cb526f28
ADDED
|
Binary file (659 Bytes). View file
|
|
|
checkpoint_29760/ocdbt.process_0/d/c3c090c9be044d9708b61feb3de61af2
ADDED
|
Binary file (629 Bytes). View file
|
|
|
checkpoint_29760/ocdbt.process_0/d/cfcbf9756b7f7cb2907d4282c74f4dc3
ADDED
|
Binary file (38.1 kB). View file
|
|
|
checkpoint_29760/ocdbt.process_0/d/d6ed69df3c31be7b72db2d9770b570a3
ADDED
|
Binary file (611 Bytes). View file
|
|
|
checkpoint_29760/ocdbt.process_0/d/fbef88da640e44233eef20f7c62a1421
ADDED
|
Binary file (1.33 kB). View file
|
|
|
checkpoint_29760/ocdbt.process_0/manifest.ocdbt
ADDED
|
Binary file (352 Bytes). View file
|
|
|
meta.json
CHANGED
|
@@ -6,7 +6,7 @@
|
|
| 6 |
"d_model": 16,
|
| 7 |
"n_heads": 1,
|
| 8 |
"depth": 2,
|
| 9 |
-
"\u03b2": 0.
|
| 10 |
"predict_delta": true,
|
| 11 |
"seasonal_lag": null,
|
| 12 |
"out_mode": "mse",
|
|
|
|
| 6 |
"d_model": 16,
|
| 7 |
"n_heads": 1,
|
| 8 |
"depth": 2,
|
| 9 |
+
"\u03b2": 0.001,
|
| 10 |
"predict_delta": true,
|
| 11 |
"seasonal_lag": null,
|
| 12 |
"out_mode": "mse",
|