jcandane commited on
Commit
54089f0
·
verified ·
1 Parent(s): df0ba15

Add TGPT checkpoint

Browse files
Files changed (37) hide show
  1. checkpoint_28520/_CHECKPOINT_METADATA +1 -0
  2. checkpoint_28520/_METADATA +1 -0
  3. checkpoint_28520/_sharding +1 -0
  4. checkpoint_28520/array_metadatas/process_0 +1 -0
  5. checkpoint_28520/d/e663bcb62cc874dde8c91afb1960109b +0 -0
  6. checkpoint_28520/manifest.ocdbt +0 -0
  7. checkpoint_28520/ocdbt.process_0/d/0fb901236817906b9c5f57df2be4cda9 +0 -0
  8. checkpoint_28520/ocdbt.process_0/d/0fe50bec37229da3fc6c7918516a3952 +0 -0
  9. checkpoint_28520/ocdbt.process_0/d/125c945c077f22e0d44fd6736d2475e5 +0 -0
  10. checkpoint_28520/ocdbt.process_0/d/14461289572b9acb231399e775bbbbe1 +0 -0
  11. checkpoint_28520/ocdbt.process_0/d/199116cbe0f11424e839f951a4d748b6 +0 -0
  12. checkpoint_28520/ocdbt.process_0/d/223ed1d645526977be924cded227eee1 +0 -0
  13. checkpoint_28520/ocdbt.process_0/d/298584da124024ecd60b07c19cd2e792 +0 -0
  14. checkpoint_28520/ocdbt.process_0/d/40b1bd95bde31f8babcf7d1bbb71252d +0 -0
  15. checkpoint_28520/ocdbt.process_0/d/4dc44069c8dfb26b4bf6fc15e5999fb5 +0 -0
  16. checkpoint_28520/ocdbt.process_0/d/6602e4a5e5c36d3171377e2dc862e9e4 +0 -0
  17. checkpoint_28520/ocdbt.process_0/d/6f18b04b27d907690c19cd9faee863bb +0 -0
  18. checkpoint_28520/ocdbt.process_0/d/76b1e1f04e13249b740b197d175c04e5 +0 -0
  19. checkpoint_28520/ocdbt.process_0/d/78434522a296d28fa2c189c5649703f6 +0 -0
  20. checkpoint_28520/ocdbt.process_0/d/803a29c72e2cd1e84f3ec0211d37374c +0 -0
  21. checkpoint_28520/ocdbt.process_0/d/8ab576c48d92bb8cf4f327057a5022ba +0 -0
  22. checkpoint_28520/ocdbt.process_0/d/a4c04e4be1cb0a103e05e79891565138 +0 -0
  23. checkpoint_28520/ocdbt.process_0/d/a6cd8746a13b05572c99f76e1a42e23f +0 -0
  24. checkpoint_28520/ocdbt.process_0/d/aa9264d09947abfdf1824a1af1716843 +0 -0
  25. checkpoint_28520/ocdbt.process_0/d/af1a8ad366018f20d87c9e517d3b520c +0 -0
  26. checkpoint_28520/ocdbt.process_0/d/b17b9ce35994029af33dffd74276a3e0 +0 -0
  27. checkpoint_28520/ocdbt.process_0/d/b1ac9b980e81c5048f1b6da620142389 +0 -0
  28. checkpoint_28520/ocdbt.process_0/d/c5497b268e57971329b25dc0b9b310dc +0 -0
  29. checkpoint_28520/ocdbt.process_0/d/cb76c6303b3c1562974400affe6ba0dc +0 -0
  30. checkpoint_28520/ocdbt.process_0/d/d17d00331968b502b44cee96f5510719 +0 -0
  31. checkpoint_28520/ocdbt.process_0/d/daaabe913b564cdb05888594f34f1cbf +0 -0
  32. checkpoint_28520/ocdbt.process_0/d/dd31ae995347e3c63c7f8c43b9f72706 +0 -0
  33. checkpoint_28520/ocdbt.process_0/d/eb51007932c0d312739358d93480ccff +0 -0
  34. checkpoint_28520/ocdbt.process_0/d/f39fc15e24007e99925b93ea850fde6d +0 -0
  35. checkpoint_28520/ocdbt.process_0/d/fd43b254c0584580607604816ac0c1e5 +0 -0
  36. checkpoint_28520/ocdbt.process_0/manifest.ocdbt +0 -0
  37. meta.json +1 -1
checkpoint_28520/_CHECKPOINT_METADATA ADDED
@@ -0,0 +1 @@
 
 
1
+ {"item_handlers": "orbax.checkpoint._src.handlers.pytree_checkpoint_handler.PyTreeCheckpointHandler", "metrics": {}, "performance_metrics": {}, "init_timestamp_nsecs": 1758139175547789274, "commit_timestamp_nsecs": 1758139175839910757, "custom_metadata": {}}
checkpoint_28520/_METADATA ADDED
@@ -0,0 +1 @@
 
 
1
+ {"tree_metadata": {"('step',)": {"key_metadata": [{"key": "step", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('params', 'in_proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "in_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'in_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "in_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2, 16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'Dense_0', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('params', 'isocoder', 'AttentionBlock_0', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 64]}}, "('params', 'isocoder', 'AttentionBlock_0', 'Dense_1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_0', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_0', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_1', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'k_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'out_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'q_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'v_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'Dense_0', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('params', 'isocoder', 'AttentionBlock_1', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 64]}}, "('params', 'isocoder', 'AttentionBlock_1', 'Dense_1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_0', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_0', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_1', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'k_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'out_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'q_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'v_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('params', 'ln', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "ln", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'ln', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "ln", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'out_proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('params', 'out_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 2]}}, "('params', 'time_proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "time_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'time_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "time_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('opt_state', '0')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}], "value_metadata": {"value_type": "None", "skip_deserialize": true}}, "('opt_state', '1', '0', 'count')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "count", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('opt_state', '1', '0', 'mu', 'in_proj', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "in_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'in_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "in_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'Dense_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 64]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'Dense_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_0', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_1', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'k_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'out_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'q_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'v_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'Dense_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 64]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'Dense_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_0', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_1', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'k_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'out_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'q_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'v_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'mu', 'ln', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "ln", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'ln', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "ln", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'out_proj', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('opt_state', '1', '0', 'mu', 'out_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 2]}}, "('opt_state', '1', '0', 'mu', 'time_proj', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "time_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'time_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "time_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('opt_state', '1', '0', 'nu', 'in_proj', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "in_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'in_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "in_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'Dense_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 64]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'Dense_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_0', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_1', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'k_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'out_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'q_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'v_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'Dense_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 64]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'Dense_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_0', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_1', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'k_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'out_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'q_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'v_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'nu', 'ln', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "ln", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'ln', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "ln", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'out_proj', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('opt_state', '1', '0', 'nu', 'out_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 2]}}, "('opt_state', '1', '0', 'nu', 'time_proj', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "time_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'time_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "time_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('opt_state', '1', '1')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "1", "key_type": 1}], "value_metadata": {"value_type": "None", "skip_deserialize": true}}, "('opt_state', '1', '2')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "2", "key_type": 1}], "value_metadata": {"value_type": "None", "skip_deserialize": true}}, "('rng',)": {"key_metadata": [{"key": "rng", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}}, "use_zarr3": false, "store_array_data_equal_to_fill_value": true, "custom_metadata": null}
checkpoint_28520/_sharding ADDED
@@ -0,0 +1 @@
 
 
1
+ {"b3B0X3N0YXRlLjEuMC5jb3VudA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS50aW1lX3Byb2ouYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS50aW1lX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pbl9wcm9qLmJpYXM=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pbl9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLk1IQV8wLm91dF9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLk1IQV8wLmtfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLk1IQV8wLnFfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLk1IQV8wLnZfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkRlbnNlXzAuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkRlbnNlXzAua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkRlbnNlXzEuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkRlbnNlXzEua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkxheWVyTm9ybc6yXzAuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkxheWVyTm9ybc6yXzAuc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkxheWVyTm9ybc6yXzEuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkxheWVyTm9ybc6yXzEuc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLk1IQV8wLm91dF9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLk1IQV8wLmtfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLk1IQV8wLnFfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLk1IQV8wLnZfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkRlbnNlXzAuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkRlbnNlXzAua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkRlbnNlXzEuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkRlbnNlXzEua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkxheWVyTm9ybc6yXzAuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkxheWVyTm9ybc6yXzAuc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkxheWVyTm9ybc6yXzEuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkxheWVyTm9ybc6yXzEuc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5sbi5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5sbi5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5vdXRfcHJvai5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5vdXRfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS50aW1lX3Byb2ouYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS50aW1lX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pbl9wcm9qLmJpYXM=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pbl9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLk1IQV8wLm91dF9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLk1IQV8wLmtfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLk1IQV8wLnFfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLk1IQV8wLnZfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkRlbnNlXzAuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkRlbnNlXzAua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkRlbnNlXzEuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkRlbnNlXzEua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkxheWVyTm9ybc6yXzAuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkxheWVyTm9ybc6yXzAuc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkxheWVyTm9ybc6yXzEuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkxheWVyTm9ybc6yXzEuc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLk1IQV8wLm91dF9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLk1IQV8wLmtfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLk1IQV8wLnFfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLk1IQV8wLnZfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkRlbnNlXzAuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkRlbnNlXzAua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkRlbnNlXzEuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkRlbnNlXzEua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkxheWVyTm9ybc6yXzAuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkxheWVyTm9ybc6yXzAuc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkxheWVyTm9ybc6yXzEuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkxheWVyTm9ybc6yXzEuc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5sbi5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5sbi5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5vdXRfcHJvai5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5vdXRfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","c3RlcA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLm91dF9wcm9qLmJpYXM=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLm91dF9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmluX3Byb2ouYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmluX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuRGVuc2VfMC5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuRGVuc2VfMC5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuRGVuc2VfMS5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuRGVuc2VfMS5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuTGF5ZXJOb3JtzrJfMC5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuTGF5ZXJOb3JtzrJfMC5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuTGF5ZXJOb3JtzrJfMS5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuTGF5ZXJOb3JtzrJfMS5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuTUhBXzAua19wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuTUhBXzAub3V0X3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuTUhBXzAucV9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuTUhBXzAudl9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuRGVuc2VfMC5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuRGVuc2VfMC5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuRGVuc2VfMS5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuRGVuc2VfMS5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuTGF5ZXJOb3JtzrJfMC5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuTGF5ZXJOb3JtzrJfMC5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuTGF5ZXJOb3JtzrJfMS5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuTGF5ZXJOb3JtzrJfMS5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuTUhBXzAua19wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuTUhBXzAub3V0X3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuTUhBXzAucV9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuTUhBXzAudl9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmxuLmJpYXM=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmxuLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLnRpbWVfcHJvai5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLnRpbWVfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cm5n":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}"}
checkpoint_28520/array_metadatas/process_0 ADDED
@@ -0,0 +1 @@
 
 
1
+ {"array_metadatas": [{"array_metadata": {"param_name": "step", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.in_proj.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.in_proj.kernel", "write_shape": [2, 16], "chunk_shape": [2, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.Dense_0.bias", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.Dense_0.kernel", "write_shape": [16, 64], "chunk_shape": [16, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.Dense_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.Dense_1.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.LayerNorm\u03b2_0.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.LayerNorm\u03b2_0.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.LayerNorm\u03b2_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.LayerNorm\u03b2_1.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.MHA_0.k_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.MHA_0.out_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.MHA_0.q_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.MHA_0.v_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.Dense_0.bias", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.Dense_0.kernel", "write_shape": [16, 64], "chunk_shape": [16, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.Dense_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.Dense_1.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.LayerNorm\u03b2_0.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.LayerNorm\u03b2_0.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.LayerNorm\u03b2_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.LayerNorm\u03b2_1.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.MHA_0.k_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.MHA_0.out_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.MHA_0.q_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.MHA_0.v_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.ln.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.ln.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.out_proj.bias", "write_shape": [2], "chunk_shape": [2], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.out_proj.kernel", "write_shape": [16, 2], "chunk_shape": [16, 2], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.time_proj.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.time_proj.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.count", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.in_proj.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.in_proj.kernel", "write_shape": [2, 16], "chunk_shape": [2, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.Dense_0.bias", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.Dense_0.kernel", "write_shape": [16, 64], "chunk_shape": [16, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.Dense_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.Dense_1.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.LayerNorm\u03b2_0.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.LayerNorm\u03b2_0.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.LayerNorm\u03b2_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.LayerNorm\u03b2_1.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.MHA_0.k_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.MHA_0.out_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.MHA_0.q_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.MHA_0.v_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.Dense_0.bias", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.Dense_0.kernel", "write_shape": [16, 64], "chunk_shape": [16, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.Dense_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.Dense_1.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.LayerNorm\u03b2_0.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.LayerNorm\u03b2_0.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.LayerNorm\u03b2_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.LayerNorm\u03b2_1.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.MHA_0.k_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.MHA_0.out_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.MHA_0.q_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.MHA_0.v_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.ln.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.ln.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.out_proj.bias", "write_shape": [2], "chunk_shape": [2], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.out_proj.kernel", "write_shape": [16, 2], "chunk_shape": [16, 2], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.time_proj.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.time_proj.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.in_proj.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.in_proj.kernel", "write_shape": [2, 16], "chunk_shape": [2, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.Dense_0.bias", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.Dense_0.kernel", "write_shape": [16, 64], "chunk_shape": [16, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.Dense_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.Dense_1.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.LayerNorm\u03b2_0.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.LayerNorm\u03b2_0.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.LayerNorm\u03b2_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.LayerNorm\u03b2_1.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.MHA_0.k_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.MHA_0.out_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.MHA_0.q_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.MHA_0.v_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.Dense_0.bias", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.Dense_0.kernel", "write_shape": [16, 64], "chunk_shape": [16, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.Dense_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.Dense_1.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.LayerNorm\u03b2_0.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.LayerNorm\u03b2_0.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.LayerNorm\u03b2_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.LayerNorm\u03b2_1.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.MHA_0.k_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.MHA_0.out_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.MHA_0.q_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.MHA_0.v_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.ln.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.ln.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.out_proj.bias", "write_shape": [2], "chunk_shape": [2], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.out_proj.kernel", "write_shape": [16, 2], "chunk_shape": [16, 2], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.time_proj.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.time_proj.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "rng", "write_shape": [2], "chunk_shape": [2], "ext_metadata": null}}]}
checkpoint_28520/d/e663bcb62cc874dde8c91afb1960109b ADDED
Binary file (24.1 kB). View file
 
checkpoint_28520/manifest.ocdbt ADDED
Binary file (118 Bytes). View file
 
checkpoint_28520/ocdbt.process_0/d/0fb901236817906b9c5f57df2be4cda9 ADDED
Binary file (650 Bytes). View file
 
checkpoint_28520/ocdbt.process_0/d/0fe50bec37229da3fc6c7918516a3952 ADDED
Binary file (625 Bytes). View file
 
checkpoint_28520/ocdbt.process_0/d/125c945c077f22e0d44fd6736d2475e5 ADDED
Binary file (614 Bytes). View file
 
checkpoint_28520/ocdbt.process_0/d/14461289572b9acb231399e775bbbbe1 ADDED
Binary file (686 Bytes). View file
 
checkpoint_28520/ocdbt.process_0/d/199116cbe0f11424e839f951a4d748b6 ADDED
Binary file (650 Bytes). View file
 
checkpoint_28520/ocdbt.process_0/d/223ed1d645526977be924cded227eee1 ADDED
Binary file (685 Bytes). View file
 
checkpoint_28520/ocdbt.process_0/d/298584da124024ecd60b07c19cd2e792 ADDED
Binary file (652 Bytes). View file
 
checkpoint_28520/ocdbt.process_0/d/40b1bd95bde31f8babcf7d1bbb71252d ADDED
Binary file (510 Bytes). View file
 
checkpoint_28520/ocdbt.process_0/d/4dc44069c8dfb26b4bf6fc15e5999fb5 ADDED
Binary file (4.5 kB). View file
 
checkpoint_28520/ocdbt.process_0/d/6602e4a5e5c36d3171377e2dc862e9e4 ADDED
Binary file (27.9 kB). View file
 
checkpoint_28520/ocdbt.process_0/d/6f18b04b27d907690c19cd9faee863bb ADDED
Binary file (626 Bytes). View file
 
checkpoint_28520/ocdbt.process_0/d/76b1e1f04e13249b740b197d175c04e5 ADDED
Binary file (619 Bytes). View file
 
checkpoint_28520/ocdbt.process_0/d/78434522a296d28fa2c189c5649703f6 ADDED
Binary file (623 Bytes). View file
 
checkpoint_28520/ocdbt.process_0/d/803a29c72e2cd1e84f3ec0211d37374c ADDED
Binary file (1.36 kB). View file
 
checkpoint_28520/ocdbt.process_0/d/8ab576c48d92bb8cf4f327057a5022ba ADDED
Binary file (646 Bytes). View file
 
checkpoint_28520/ocdbt.process_0/d/a4c04e4be1cb0a103e05e79891565138 ADDED
Binary file (650 Bytes). View file
 
checkpoint_28520/ocdbt.process_0/d/a6cd8746a13b05572c99f76e1a42e23f ADDED
Binary file (452 Bytes). View file
 
checkpoint_28520/ocdbt.process_0/d/aa9264d09947abfdf1824a1af1716843 ADDED
Binary file (650 Bytes). View file
 
checkpoint_28520/ocdbt.process_0/d/af1a8ad366018f20d87c9e517d3b520c ADDED
Binary file (662 Bytes). View file
 
checkpoint_28520/ocdbt.process_0/d/b17b9ce35994029af33dffd74276a3e0 ADDED
Binary file (16.8 kB). View file
 
checkpoint_28520/ocdbt.process_0/d/b1ac9b980e81c5048f1b6da620142389 ADDED
Binary file (614 Bytes). View file
 
checkpoint_28520/ocdbt.process_0/d/c5497b268e57971329b25dc0b9b310dc ADDED
Binary file (679 Bytes). View file
 
checkpoint_28520/ocdbt.process_0/d/cb76c6303b3c1562974400affe6ba0dc ADDED
Binary file (171 Bytes). View file
 
checkpoint_28520/ocdbt.process_0/d/d17d00331968b502b44cee96f5510719 ADDED
Binary file (623 Bytes). View file
 
checkpoint_28520/ocdbt.process_0/d/daaabe913b564cdb05888594f34f1cbf ADDED
Binary file (645 Bytes). View file
 
checkpoint_28520/ocdbt.process_0/d/dd31ae995347e3c63c7f8c43b9f72706 ADDED
Binary file (64.7 kB). View file
 
checkpoint_28520/ocdbt.process_0/d/eb51007932c0d312739358d93480ccff ADDED
Binary file (610 Bytes). View file
 
checkpoint_28520/ocdbt.process_0/d/f39fc15e24007e99925b93ea850fde6d ADDED
Binary file (640 Bytes). View file
 
checkpoint_28520/ocdbt.process_0/d/fd43b254c0584580607604816ac0c1e5 ADDED
Binary file (610 Bytes). View file
 
checkpoint_28520/ocdbt.process_0/manifest.ocdbt ADDED
Binary file (582 Bytes). View file
 
meta.json CHANGED
@@ -6,7 +6,7 @@
6
  "d_model": 16,
7
  "n_heads": 1,
8
  "depth": 2,
9
- "\u03b2": 0.01,
10
  "predict_delta": true,
11
  "seasonal_lag": null,
12
  "out_mode": "mse",
 
6
  "d_model": 16,
7
  "n_heads": 1,
8
  "depth": 2,
9
+ "\u03b2": 0.005,
10
  "predict_delta": true,
11
  "seasonal_lag": null,
12
  "out_mode": "mse",