Add TGPT checkpoint
Browse files- checkpoint_28520/_CHECKPOINT_METADATA +1 -0
- checkpoint_28520/_METADATA +1 -0
- checkpoint_28520/_sharding +1 -0
- checkpoint_28520/array_metadatas/process_0 +1 -0
- checkpoint_28520/d/e663bcb62cc874dde8c91afb1960109b +0 -0
- checkpoint_28520/manifest.ocdbt +0 -0
- checkpoint_28520/ocdbt.process_0/d/0fb901236817906b9c5f57df2be4cda9 +0 -0
- checkpoint_28520/ocdbt.process_0/d/0fe50bec37229da3fc6c7918516a3952 +0 -0
- checkpoint_28520/ocdbt.process_0/d/125c945c077f22e0d44fd6736d2475e5 +0 -0
- checkpoint_28520/ocdbt.process_0/d/14461289572b9acb231399e775bbbbe1 +0 -0
- checkpoint_28520/ocdbt.process_0/d/199116cbe0f11424e839f951a4d748b6 +0 -0
- checkpoint_28520/ocdbt.process_0/d/223ed1d645526977be924cded227eee1 +0 -0
- checkpoint_28520/ocdbt.process_0/d/298584da124024ecd60b07c19cd2e792 +0 -0
- checkpoint_28520/ocdbt.process_0/d/40b1bd95bde31f8babcf7d1bbb71252d +0 -0
- checkpoint_28520/ocdbt.process_0/d/4dc44069c8dfb26b4bf6fc15e5999fb5 +0 -0
- checkpoint_28520/ocdbt.process_0/d/6602e4a5e5c36d3171377e2dc862e9e4 +0 -0
- checkpoint_28520/ocdbt.process_0/d/6f18b04b27d907690c19cd9faee863bb +0 -0
- checkpoint_28520/ocdbt.process_0/d/76b1e1f04e13249b740b197d175c04e5 +0 -0
- checkpoint_28520/ocdbt.process_0/d/78434522a296d28fa2c189c5649703f6 +0 -0
- checkpoint_28520/ocdbt.process_0/d/803a29c72e2cd1e84f3ec0211d37374c +0 -0
- checkpoint_28520/ocdbt.process_0/d/8ab576c48d92bb8cf4f327057a5022ba +0 -0
- checkpoint_28520/ocdbt.process_0/d/a4c04e4be1cb0a103e05e79891565138 +0 -0
- checkpoint_28520/ocdbt.process_0/d/a6cd8746a13b05572c99f76e1a42e23f +0 -0
- checkpoint_28520/ocdbt.process_0/d/aa9264d09947abfdf1824a1af1716843 +0 -0
- checkpoint_28520/ocdbt.process_0/d/af1a8ad366018f20d87c9e517d3b520c +0 -0
- checkpoint_28520/ocdbt.process_0/d/b17b9ce35994029af33dffd74276a3e0 +0 -0
- checkpoint_28520/ocdbt.process_0/d/b1ac9b980e81c5048f1b6da620142389 +0 -0
- checkpoint_28520/ocdbt.process_0/d/c5497b268e57971329b25dc0b9b310dc +0 -0
- checkpoint_28520/ocdbt.process_0/d/cb76c6303b3c1562974400affe6ba0dc +0 -0
- checkpoint_28520/ocdbt.process_0/d/d17d00331968b502b44cee96f5510719 +0 -0
- checkpoint_28520/ocdbt.process_0/d/daaabe913b564cdb05888594f34f1cbf +0 -0
- checkpoint_28520/ocdbt.process_0/d/dd31ae995347e3c63c7f8c43b9f72706 +0 -0
- checkpoint_28520/ocdbt.process_0/d/eb51007932c0d312739358d93480ccff +0 -0
- checkpoint_28520/ocdbt.process_0/d/f39fc15e24007e99925b93ea850fde6d +0 -0
- checkpoint_28520/ocdbt.process_0/d/fd43b254c0584580607604816ac0c1e5 +0 -0
- checkpoint_28520/ocdbt.process_0/manifest.ocdbt +0 -0
- meta.json +1 -1
checkpoint_28520/_CHECKPOINT_METADATA
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"item_handlers": "orbax.checkpoint._src.handlers.pytree_checkpoint_handler.PyTreeCheckpointHandler", "metrics": {}, "performance_metrics": {}, "init_timestamp_nsecs": 1758139175547789274, "commit_timestamp_nsecs": 1758139175839910757, "custom_metadata": {}}
|
checkpoint_28520/_METADATA
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"tree_metadata": {"('step',)": {"key_metadata": [{"key": "step", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('params', 'in_proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "in_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'in_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "in_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2, 16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'Dense_0', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('params', 'isocoder', 'AttentionBlock_0', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 64]}}, "('params', 'isocoder', 'AttentionBlock_0', 'Dense_1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_0', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_0', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_1', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'k_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'out_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'q_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'v_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'Dense_0', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('params', 'isocoder', 'AttentionBlock_1', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 64]}}, "('params', 'isocoder', 'AttentionBlock_1', 'Dense_1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_0', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_0', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_1', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'k_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'out_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'q_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'v_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('params', 'ln', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "ln", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'ln', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "ln", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'out_proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('params', 'out_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 2]}}, "('params', 'time_proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "time_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'time_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "time_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('opt_state', '0')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}], "value_metadata": {"value_type": "None", "skip_deserialize": true}}, "('opt_state', '1', '0', 'count')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "count", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('opt_state', '1', '0', 'mu', 'in_proj', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "in_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'in_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "in_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'Dense_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 64]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'Dense_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_0', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_1', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'k_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'out_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'q_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'v_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'Dense_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 64]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'Dense_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_0', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_1', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'k_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'out_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'q_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'v_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'mu', 'ln', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "ln", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'ln', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "ln", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'out_proj', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('opt_state', '1', '0', 'mu', 'out_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 2]}}, "('opt_state', '1', '0', 'mu', 'time_proj', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "time_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'time_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "time_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('opt_state', '1', '0', 'nu', 'in_proj', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "in_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'in_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "in_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'Dense_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 64]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'Dense_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_0', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_1', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'k_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'out_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'q_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'v_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'Dense_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 64]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'Dense_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_0', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_1', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'k_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'out_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'q_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'v_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'nu', 'ln', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "ln", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'ln', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "ln", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'out_proj', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('opt_state', '1', '0', 'nu', 'out_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 2]}}, "('opt_state', '1', '0', 'nu', 'time_proj', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "time_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'time_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "time_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('opt_state', '1', '1')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "1", "key_type": 1}], "value_metadata": {"value_type": "None", "skip_deserialize": true}}, "('opt_state', '1', '2')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "2", "key_type": 1}], "value_metadata": {"value_type": "None", "skip_deserialize": true}}, "('rng',)": {"key_metadata": [{"key": "rng", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}}, "use_zarr3": false, "store_array_data_equal_to_fill_value": true, "custom_metadata": null}
|
checkpoint_28520/_sharding
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"b3B0X3N0YXRlLjEuMC5jb3VudA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS50aW1lX3Byb2ouYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS50aW1lX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pbl9wcm9qLmJpYXM=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pbl9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLk1IQV8wLm91dF9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLk1IQV8wLmtfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLk1IQV8wLnFfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLk1IQV8wLnZfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkRlbnNlXzAuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkRlbnNlXzAua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkRlbnNlXzEuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkRlbnNlXzEua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkxheWVyTm9ybc6yXzAuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkxheWVyTm9ybc6yXzAuc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkxheWVyTm9ybc6yXzEuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkxheWVyTm9ybc6yXzEuc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLk1IQV8wLm91dF9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLk1IQV8wLmtfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLk1IQV8wLnFfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLk1IQV8wLnZfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkRlbnNlXzAuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkRlbnNlXzAua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkRlbnNlXzEuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkRlbnNlXzEua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkxheWVyTm9ybc6yXzAuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkxheWVyTm9ybc6yXzAuc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkxheWVyTm9ybc6yXzEuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkxheWVyTm9ybc6yXzEuc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5sbi5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5sbi5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5vdXRfcHJvai5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5vdXRfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS50aW1lX3Byb2ouYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS50aW1lX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pbl9wcm9qLmJpYXM=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pbl9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLk1IQV8wLm91dF9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLk1IQV8wLmtfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLk1IQV8wLnFfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLk1IQV8wLnZfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkRlbnNlXzAuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkRlbnNlXzAua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkRlbnNlXzEuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkRlbnNlXzEua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkxheWVyTm9ybc6yXzAuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkxheWVyTm9ybc6yXzAuc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkxheWVyTm9ybc6yXzEuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkxheWVyTm9ybc6yXzEuc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLk1IQV8wLm91dF9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLk1IQV8wLmtfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLk1IQV8wLnFfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLk1IQV8wLnZfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkRlbnNlXzAuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkRlbnNlXzAua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkRlbnNlXzEuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkRlbnNlXzEua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkxheWVyTm9ybc6yXzAuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkxheWVyTm9ybc6yXzAuc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkxheWVyTm9ybc6yXzEuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkxheWVyTm9ybc6yXzEuc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5sbi5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5sbi5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5vdXRfcHJvai5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5vdXRfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","c3RlcA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLm91dF9wcm9qLmJpYXM=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLm91dF9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmluX3Byb2ouYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmluX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuRGVuc2VfMC5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuRGVuc2VfMC5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuRGVuc2VfMS5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuRGVuc2VfMS5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuTGF5ZXJOb3JtzrJfMC5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuTGF5ZXJOb3JtzrJfMC5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuTGF5ZXJOb3JtzrJfMS5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuTGF5ZXJOb3JtzrJfMS5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuTUhBXzAua19wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuTUhBXzAub3V0X3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuTUhBXzAucV9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuTUhBXzAudl9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuRGVuc2VfMC5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuRGVuc2VfMC5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuRGVuc2VfMS5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuRGVuc2VfMS5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuTGF5ZXJOb3JtzrJfMC5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuTGF5ZXJOb3JtzrJfMC5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuTGF5ZXJOb3JtzrJfMS5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuTGF5ZXJOb3JtzrJfMS5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuTUhBXzAua19wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuTUhBXzAub3V0X3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuTUhBXzAucV9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuTUhBXzAudl9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmxuLmJpYXM=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmxuLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLnRpbWVfcHJvai5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLnRpbWVfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cm5n":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}"}
|
checkpoint_28520/array_metadatas/process_0
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"array_metadatas": [{"array_metadata": {"param_name": "step", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.in_proj.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.in_proj.kernel", "write_shape": [2, 16], "chunk_shape": [2, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.Dense_0.bias", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.Dense_0.kernel", "write_shape": [16, 64], "chunk_shape": [16, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.Dense_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.Dense_1.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.LayerNorm\u03b2_0.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.LayerNorm\u03b2_0.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.LayerNorm\u03b2_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.LayerNorm\u03b2_1.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.MHA_0.k_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.MHA_0.out_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.MHA_0.q_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.MHA_0.v_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.Dense_0.bias", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.Dense_0.kernel", "write_shape": [16, 64], "chunk_shape": [16, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.Dense_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.Dense_1.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.LayerNorm\u03b2_0.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.LayerNorm\u03b2_0.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.LayerNorm\u03b2_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.LayerNorm\u03b2_1.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.MHA_0.k_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.MHA_0.out_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.MHA_0.q_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.MHA_0.v_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.ln.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.ln.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.out_proj.bias", "write_shape": [2], "chunk_shape": [2], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.out_proj.kernel", "write_shape": [16, 2], "chunk_shape": [16, 2], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.time_proj.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.time_proj.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.count", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.in_proj.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.in_proj.kernel", "write_shape": [2, 16], "chunk_shape": [2, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.Dense_0.bias", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.Dense_0.kernel", "write_shape": [16, 64], "chunk_shape": [16, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.Dense_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.Dense_1.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.LayerNorm\u03b2_0.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.LayerNorm\u03b2_0.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.LayerNorm\u03b2_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.LayerNorm\u03b2_1.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.MHA_0.k_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.MHA_0.out_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.MHA_0.q_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.MHA_0.v_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.Dense_0.bias", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.Dense_0.kernel", "write_shape": [16, 64], "chunk_shape": [16, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.Dense_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.Dense_1.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.LayerNorm\u03b2_0.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.LayerNorm\u03b2_0.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.LayerNorm\u03b2_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.LayerNorm\u03b2_1.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.MHA_0.k_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.MHA_0.out_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.MHA_0.q_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.MHA_0.v_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.ln.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.ln.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.out_proj.bias", "write_shape": [2], "chunk_shape": [2], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.out_proj.kernel", "write_shape": [16, 2], "chunk_shape": [16, 2], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.time_proj.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.time_proj.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.in_proj.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.in_proj.kernel", "write_shape": [2, 16], "chunk_shape": [2, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.Dense_0.bias", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.Dense_0.kernel", "write_shape": [16, 64], "chunk_shape": [16, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.Dense_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.Dense_1.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.LayerNorm\u03b2_0.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.LayerNorm\u03b2_0.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.LayerNorm\u03b2_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.LayerNorm\u03b2_1.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.MHA_0.k_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.MHA_0.out_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.MHA_0.q_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.MHA_0.v_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.Dense_0.bias", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.Dense_0.kernel", "write_shape": [16, 64], "chunk_shape": [16, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.Dense_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.Dense_1.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.LayerNorm\u03b2_0.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.LayerNorm\u03b2_0.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.LayerNorm\u03b2_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.LayerNorm\u03b2_1.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.MHA_0.k_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.MHA_0.out_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.MHA_0.q_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.MHA_0.v_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.ln.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.ln.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.out_proj.bias", "write_shape": [2], "chunk_shape": [2], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.out_proj.kernel", "write_shape": [16, 2], "chunk_shape": [16, 2], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.time_proj.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.time_proj.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "rng", "write_shape": [2], "chunk_shape": [2], "ext_metadata": null}}]}
|
checkpoint_28520/d/e663bcb62cc874dde8c91afb1960109b
ADDED
|
Binary file (24.1 kB). View file
|
|
|
checkpoint_28520/manifest.ocdbt
ADDED
|
Binary file (118 Bytes). View file
|
|
|
checkpoint_28520/ocdbt.process_0/d/0fb901236817906b9c5f57df2be4cda9
ADDED
|
Binary file (650 Bytes). View file
|
|
|
checkpoint_28520/ocdbt.process_0/d/0fe50bec37229da3fc6c7918516a3952
ADDED
|
Binary file (625 Bytes). View file
|
|
|
checkpoint_28520/ocdbt.process_0/d/125c945c077f22e0d44fd6736d2475e5
ADDED
|
Binary file (614 Bytes). View file
|
|
|
checkpoint_28520/ocdbt.process_0/d/14461289572b9acb231399e775bbbbe1
ADDED
|
Binary file (686 Bytes). View file
|
|
|
checkpoint_28520/ocdbt.process_0/d/199116cbe0f11424e839f951a4d748b6
ADDED
|
Binary file (650 Bytes). View file
|
|
|
checkpoint_28520/ocdbt.process_0/d/223ed1d645526977be924cded227eee1
ADDED
|
Binary file (685 Bytes). View file
|
|
|
checkpoint_28520/ocdbt.process_0/d/298584da124024ecd60b07c19cd2e792
ADDED
|
Binary file (652 Bytes). View file
|
|
|
checkpoint_28520/ocdbt.process_0/d/40b1bd95bde31f8babcf7d1bbb71252d
ADDED
|
Binary file (510 Bytes). View file
|
|
|
checkpoint_28520/ocdbt.process_0/d/4dc44069c8dfb26b4bf6fc15e5999fb5
ADDED
|
Binary file (4.5 kB). View file
|
|
|
checkpoint_28520/ocdbt.process_0/d/6602e4a5e5c36d3171377e2dc862e9e4
ADDED
|
Binary file (27.9 kB). View file
|
|
|
checkpoint_28520/ocdbt.process_0/d/6f18b04b27d907690c19cd9faee863bb
ADDED
|
Binary file (626 Bytes). View file
|
|
|
checkpoint_28520/ocdbt.process_0/d/76b1e1f04e13249b740b197d175c04e5
ADDED
|
Binary file (619 Bytes). View file
|
|
|
checkpoint_28520/ocdbt.process_0/d/78434522a296d28fa2c189c5649703f6
ADDED
|
Binary file (623 Bytes). View file
|
|
|
checkpoint_28520/ocdbt.process_0/d/803a29c72e2cd1e84f3ec0211d37374c
ADDED
|
Binary file (1.36 kB). View file
|
|
|
checkpoint_28520/ocdbt.process_0/d/8ab576c48d92bb8cf4f327057a5022ba
ADDED
|
Binary file (646 Bytes). View file
|
|
|
checkpoint_28520/ocdbt.process_0/d/a4c04e4be1cb0a103e05e79891565138
ADDED
|
Binary file (650 Bytes). View file
|
|
|
checkpoint_28520/ocdbt.process_0/d/a6cd8746a13b05572c99f76e1a42e23f
ADDED
|
Binary file (452 Bytes). View file
|
|
|
checkpoint_28520/ocdbt.process_0/d/aa9264d09947abfdf1824a1af1716843
ADDED
|
Binary file (650 Bytes). View file
|
|
|
checkpoint_28520/ocdbt.process_0/d/af1a8ad366018f20d87c9e517d3b520c
ADDED
|
Binary file (662 Bytes). View file
|
|
|
checkpoint_28520/ocdbt.process_0/d/b17b9ce35994029af33dffd74276a3e0
ADDED
|
Binary file (16.8 kB). View file
|
|
|
checkpoint_28520/ocdbt.process_0/d/b1ac9b980e81c5048f1b6da620142389
ADDED
|
Binary file (614 Bytes). View file
|
|
|
checkpoint_28520/ocdbt.process_0/d/c5497b268e57971329b25dc0b9b310dc
ADDED
|
Binary file (679 Bytes). View file
|
|
|
checkpoint_28520/ocdbt.process_0/d/cb76c6303b3c1562974400affe6ba0dc
ADDED
|
Binary file (171 Bytes). View file
|
|
|
checkpoint_28520/ocdbt.process_0/d/d17d00331968b502b44cee96f5510719
ADDED
|
Binary file (623 Bytes). View file
|
|
|
checkpoint_28520/ocdbt.process_0/d/daaabe913b564cdb05888594f34f1cbf
ADDED
|
Binary file (645 Bytes). View file
|
|
|
checkpoint_28520/ocdbt.process_0/d/dd31ae995347e3c63c7f8c43b9f72706
ADDED
|
Binary file (64.7 kB). View file
|
|
|
checkpoint_28520/ocdbt.process_0/d/eb51007932c0d312739358d93480ccff
ADDED
|
Binary file (610 Bytes). View file
|
|
|
checkpoint_28520/ocdbt.process_0/d/f39fc15e24007e99925b93ea850fde6d
ADDED
|
Binary file (640 Bytes). View file
|
|
|
checkpoint_28520/ocdbt.process_0/d/fd43b254c0584580607604816ac0c1e5
ADDED
|
Binary file (610 Bytes). View file
|
|
|
checkpoint_28520/ocdbt.process_0/manifest.ocdbt
ADDED
|
Binary file (582 Bytes). View file
|
|
|
meta.json
CHANGED
|
@@ -6,7 +6,7 @@
|
|
| 6 |
"d_model": 16,
|
| 7 |
"n_heads": 1,
|
| 8 |
"depth": 2,
|
| 9 |
-
"\u03b2": 0.
|
| 10 |
"predict_delta": true,
|
| 11 |
"seasonal_lag": null,
|
| 12 |
"out_mode": "mse",
|
|
|
|
| 6 |
"d_model": 16,
|
| 7 |
"n_heads": 1,
|
| 8 |
"depth": 2,
|
| 9 |
+
"\u03b2": 0.005,
|
| 10 |
"predict_delta": true,
|
| 11 |
"seasonal_lag": null,
|
| 12 |
"out_mode": "mse",
|