jcandane commited on
Commit
df0ba15
·
verified ·
1 Parent(s): c3e89fd

Add TGPT checkpoint

Browse files
Files changed (45) hide show
  1. checkpoint_27280/_CHECKPOINT_METADATA +1 -0
  2. checkpoint_27280/_METADATA +1 -0
  3. checkpoint_27280/_sharding +1 -0
  4. checkpoint_27280/array_metadatas/process_0 +1 -0
  5. checkpoint_27280/d/cff05287b3aede246c488839ecb7cab7 +0 -0
  6. checkpoint_27280/manifest.ocdbt +0 -0
  7. checkpoint_27280/ocdbt.process_0/d/02d7fa9db97eaf0b7ff66055851d601c +0 -0
  8. checkpoint_27280/ocdbt.process_0/d/0e35fb62a3b985f6ef9391df88e78ca8 +0 -0
  9. checkpoint_27280/ocdbt.process_0/d/22029e9dcca48c57051023cad3a032b4 +0 -0
  10. checkpoint_27280/ocdbt.process_0/d/2363ec48f31f5d8f73f699310901cba4 +0 -0
  11. checkpoint_27280/ocdbt.process_0/d/23e983d6a5482151c96f2ee689adcbe6 +0 -0
  12. checkpoint_27280/ocdbt.process_0/d/25880371a71d73bfac27d3483bdd63fe +0 -0
  13. checkpoint_27280/ocdbt.process_0/d/328a3a170f700aee5bb95ad4070f1d18 +0 -0
  14. checkpoint_27280/ocdbt.process_0/d/35261538c16625716438f160bb074611 +0 -0
  15. checkpoint_27280/ocdbt.process_0/d/3797049fbd9502a08b89922e337921ce +0 -0
  16. checkpoint_27280/ocdbt.process_0/d/38990cc5691b94f0455ee1b21b2d53b0 +0 -0
  17. checkpoint_27280/ocdbt.process_0/d/4313bcb0b2bcce7df2b8c3d567395b5e +0 -0
  18. checkpoint_27280/ocdbt.process_0/d/60006b74ff2de74eca23c15027020623 +0 -0
  19. checkpoint_27280/ocdbt.process_0/d/6cdfbe5d8bda7a3982661599fa0895ca +0 -0
  20. checkpoint_27280/ocdbt.process_0/d/7a0d374181eecabcb60d0630b935c4b7 +0 -0
  21. checkpoint_27280/ocdbt.process_0/d/7a40c8a47add879c904f9b94f79cdfd3 +0 -0
  22. checkpoint_27280/ocdbt.process_0/d/83800d9131dcc9d4cf0acf62eadf1f09 +0 -0
  23. checkpoint_27280/ocdbt.process_0/d/848209f640b95719304ab53c2441b073 +0 -0
  24. checkpoint_27280/ocdbt.process_0/d/8ee6c628242266b8fe162b549a25c1b9 +0 -0
  25. checkpoint_27280/ocdbt.process_0/d/92de87146c3b7be1e9875d8665191916 +0 -0
  26. checkpoint_27280/ocdbt.process_0/d/9424775ec5b5d7ed4a336e745a6d2862 +0 -0
  27. checkpoint_27280/ocdbt.process_0/d/9a80f1cc1093313e3899117c6655f418 +0 -0
  28. checkpoint_27280/ocdbt.process_0/d/9f8072ac44c6a5d43fcfdf7de4aff56b +0 -0
  29. checkpoint_27280/ocdbt.process_0/d/a9d9fdae64e3c58de85d253540c33e23 +0 -0
  30. checkpoint_27280/ocdbt.process_0/d/b109f07538a132a6e547d294c4407e3e +0 -0
  31. checkpoint_27280/ocdbt.process_0/d/b9e2de721c914605bc6193bfe3660d5c +0 -0
  32. checkpoint_27280/ocdbt.process_0/d/bbdd10b7a48b4c8e3d693b9f5a0f2eb6 +0 -0
  33. checkpoint_27280/ocdbt.process_0/d/bc252d374759c5c812a471ecbf507cdc +0 -0
  34. checkpoint_27280/ocdbt.process_0/d/d5c00ef944a95fb411b8837d25fbcec3 +0 -0
  35. checkpoint_27280/ocdbt.process_0/d/d6ca1c452f681e173c798c9ac25c423e +0 -0
  36. checkpoint_27280/ocdbt.process_0/d/d7b349bd9b656e81a51ca8d240c03e3a +0 -0
  37. checkpoint_27280/ocdbt.process_0/d/d988b606c8f2ca671e2348f4223df4dd +0 -0
  38. checkpoint_27280/ocdbt.process_0/d/e11ef539ce4f08e10af372c1346b91af +0 -0
  39. checkpoint_27280/ocdbt.process_0/d/e7bfa6397a1bacb0aa6a24ffc9f904fa +0 -0
  40. checkpoint_27280/ocdbt.process_0/d/eee4bd8c40ce87de795235600c319aed +0 -0
  41. checkpoint_27280/ocdbt.process_0/d/f16d7e51a1d3effcf351c669507a2857 +0 -0
  42. checkpoint_27280/ocdbt.process_0/d/f8fce8298920fa6581f76a60f277c75c +0 -0
  43. checkpoint_27280/ocdbt.process_0/d/fec28a2e75a734aae0fabe4b1365f671 +0 -0
  44. checkpoint_27280/ocdbt.process_0/manifest.ocdbt +0 -0
  45. meta.json +1 -1
checkpoint_27280/_CHECKPOINT_METADATA ADDED
@@ -0,0 +1 @@
 
 
1
+ {"item_handlers": "orbax.checkpoint._src.handlers.pytree_checkpoint_handler.PyTreeCheckpointHandler", "metrics": {}, "performance_metrics": {}, "init_timestamp_nsecs": 1758139094026734468, "commit_timestamp_nsecs": 1758139094351559187, "custom_metadata": {}}
checkpoint_27280/_METADATA ADDED
@@ -0,0 +1 @@
 
 
1
+ {"tree_metadata": {"('step',)": {"key_metadata": [{"key": "step", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('params', 'in_proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "in_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'in_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "in_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2, 16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'Dense_0', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('params', 'isocoder', 'AttentionBlock_0', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 64]}}, "('params', 'isocoder', 'AttentionBlock_0', 'Dense_1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_0', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_0', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_1', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'k_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'out_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'q_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'v_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'Dense_0', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('params', 'isocoder', 'AttentionBlock_1', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 64]}}, "('params', 'isocoder', 'AttentionBlock_1', 'Dense_1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_0', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_0', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_1', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'k_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'out_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'q_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'v_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('params', 'ln', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "ln", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'ln', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "ln", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'out_proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('params', 'out_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 2]}}, "('params', 'time_proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "time_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'time_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "time_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('opt_state', '0')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}], "value_metadata": {"value_type": "None", "skip_deserialize": true}}, "('opt_state', '1', '0', 'count')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "count", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('opt_state', '1', '0', 'mu', 'in_proj', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "in_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'in_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "in_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'Dense_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 64]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'Dense_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_0', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_1', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'k_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'out_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'q_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'v_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'Dense_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 64]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'Dense_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_0', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_1', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'k_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'out_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'q_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'v_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'mu', 'ln', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "ln", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'ln', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "ln", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'out_proj', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('opt_state', '1', '0', 'mu', 'out_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 2]}}, "('opt_state', '1', '0', 'mu', 'time_proj', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "time_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'time_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "time_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('opt_state', '1', '0', 'nu', 'in_proj', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "in_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'in_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "in_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'Dense_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 64]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'Dense_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_0', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_1', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'k_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'out_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'q_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'v_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'Dense_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 64]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'Dense_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_0', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_1', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'k_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'out_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'q_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'v_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'nu', 'ln', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "ln", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'ln', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "ln", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'out_proj', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('opt_state', '1', '0', 'nu', 'out_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 2]}}, "('opt_state', '1', '0', 'nu', 'time_proj', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "time_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'time_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "time_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('opt_state', '1', '1')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "1", "key_type": 1}], "value_metadata": {"value_type": "None", "skip_deserialize": true}}, "('opt_state', '1', '2')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "2", "key_type": 1}], "value_metadata": {"value_type": "None", "skip_deserialize": true}}, "('rng',)": {"key_metadata": [{"key": "rng", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}}, "use_zarr3": false, "store_array_data_equal_to_fill_value": true, "custom_metadata": null}
checkpoint_27280/_sharding ADDED
@@ -0,0 +1 @@
 
 
1
+ {"b3B0X3N0YXRlLjEuMC5jb3VudA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS50aW1lX3Byb2ouYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS50aW1lX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pbl9wcm9qLmJpYXM=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pbl9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLk1IQV8wLm91dF9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLk1IQV8wLmtfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLk1IQV8wLnFfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLk1IQV8wLnZfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkRlbnNlXzAuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkRlbnNlXzAua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkRlbnNlXzEuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkRlbnNlXzEua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkxheWVyTm9ybc6yXzAuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkxheWVyTm9ybc6yXzAuc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkxheWVyTm9ybc6yXzEuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkxheWVyTm9ybc6yXzEuc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLk1IQV8wLm91dF9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLk1IQV8wLmtfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLk1IQV8wLnFfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLk1IQV8wLnZfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkRlbnNlXzAuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkRlbnNlXzAua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkRlbnNlXzEuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkRlbnNlXzEua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkxheWVyTm9ybc6yXzAuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkxheWVyTm9ybc6yXzAuc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkxheWVyTm9ybc6yXzEuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkxheWVyTm9ybc6yXzEuc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5sbi5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5sbi5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5vdXRfcHJvai5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5vdXRfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS50aW1lX3Byb2ouYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS50aW1lX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pbl9wcm9qLmJpYXM=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pbl9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLk1IQV8wLm91dF9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLk1IQV8wLmtfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLk1IQV8wLnFfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLk1IQV8wLnZfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkRlbnNlXzAuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkRlbnNlXzAua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkRlbnNlXzEuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkRlbnNlXzEua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkxheWVyTm9ybc6yXzAuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkxheWVyTm9ybc6yXzAuc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkxheWVyTm9ybc6yXzEuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkxheWVyTm9ybc6yXzEuc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLk1IQV8wLm91dF9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLk1IQV8wLmtfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLk1IQV8wLnFfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLk1IQV8wLnZfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkRlbnNlXzAuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkRlbnNlXzAua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkRlbnNlXzEuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkRlbnNlXzEua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkxheWVyTm9ybc6yXzAuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkxheWVyTm9ybc6yXzAuc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkxheWVyTm9ybc6yXzEuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkxheWVyTm9ybc6yXzEuc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5sbi5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5sbi5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5vdXRfcHJvai5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5vdXRfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","c3RlcA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLm91dF9wcm9qLmJpYXM=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLm91dF9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmluX3Byb2ouYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmluX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuRGVuc2VfMC5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuRGVuc2VfMC5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuRGVuc2VfMS5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuRGVuc2VfMS5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuTGF5ZXJOb3JtzrJfMC5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuTGF5ZXJOb3JtzrJfMC5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuTGF5ZXJOb3JtzrJfMS5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuTGF5ZXJOb3JtzrJfMS5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuTUhBXzAua19wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuTUhBXzAub3V0X3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuTUhBXzAucV9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuTUhBXzAudl9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuRGVuc2VfMC5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuRGVuc2VfMC5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuRGVuc2VfMS5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuRGVuc2VfMS5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuTGF5ZXJOb3JtzrJfMC5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuTGF5ZXJOb3JtzrJfMC5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuTGF5ZXJOb3JtzrJfMS5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuTGF5ZXJOb3JtzrJfMS5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuTUhBXzAua19wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuTUhBXzAub3V0X3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuTUhBXzAucV9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuTUhBXzAudl9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmxuLmJpYXM=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmxuLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLnRpbWVfcHJvai5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLnRpbWVfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cm5n":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}"}
checkpoint_27280/array_metadatas/process_0 ADDED
@@ -0,0 +1 @@
 
 
1
+ {"array_metadatas": [{"array_metadata": {"param_name": "step", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.in_proj.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.in_proj.kernel", "write_shape": [2, 16], "chunk_shape": [2, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.Dense_0.bias", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.Dense_0.kernel", "write_shape": [16, 64], "chunk_shape": [16, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.Dense_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.Dense_1.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.LayerNorm\u03b2_0.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.LayerNorm\u03b2_0.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.LayerNorm\u03b2_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.LayerNorm\u03b2_1.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.MHA_0.k_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.MHA_0.out_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.MHA_0.q_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.MHA_0.v_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.Dense_0.bias", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.Dense_0.kernel", "write_shape": [16, 64], "chunk_shape": [16, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.Dense_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.Dense_1.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.LayerNorm\u03b2_0.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.LayerNorm\u03b2_0.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.LayerNorm\u03b2_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.LayerNorm\u03b2_1.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.MHA_0.k_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.MHA_0.out_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.MHA_0.q_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.MHA_0.v_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.ln.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.ln.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.out_proj.bias", "write_shape": [2], "chunk_shape": [2], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.out_proj.kernel", "write_shape": [16, 2], "chunk_shape": [16, 2], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.time_proj.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.time_proj.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.count", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.in_proj.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.in_proj.kernel", "write_shape": [2, 16], "chunk_shape": [2, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.Dense_0.bias", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.Dense_0.kernel", "write_shape": [16, 64], "chunk_shape": [16, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.Dense_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.Dense_1.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.LayerNorm\u03b2_0.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.LayerNorm\u03b2_0.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.LayerNorm\u03b2_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.LayerNorm\u03b2_1.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.MHA_0.k_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.MHA_0.out_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.MHA_0.q_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.MHA_0.v_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.Dense_0.bias", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.Dense_0.kernel", "write_shape": [16, 64], "chunk_shape": [16, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.Dense_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.Dense_1.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.LayerNorm\u03b2_0.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.LayerNorm\u03b2_0.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.LayerNorm\u03b2_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.LayerNorm\u03b2_1.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.MHA_0.k_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.MHA_0.out_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.MHA_0.q_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.MHA_0.v_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.ln.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.ln.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.out_proj.bias", "write_shape": [2], "chunk_shape": [2], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.out_proj.kernel", "write_shape": [16, 2], "chunk_shape": [16, 2], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.time_proj.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.time_proj.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.in_proj.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.in_proj.kernel", "write_shape": [2, 16], "chunk_shape": [2, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.Dense_0.bias", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.Dense_0.kernel", "write_shape": [16, 64], "chunk_shape": [16, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.Dense_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.Dense_1.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.LayerNorm\u03b2_0.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.LayerNorm\u03b2_0.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.LayerNorm\u03b2_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.LayerNorm\u03b2_1.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.MHA_0.k_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.MHA_0.out_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.MHA_0.q_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.MHA_0.v_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.Dense_0.bias", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.Dense_0.kernel", "write_shape": [16, 64], "chunk_shape": [16, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.Dense_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.Dense_1.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.LayerNorm\u03b2_0.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.LayerNorm\u03b2_0.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.LayerNorm\u03b2_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.LayerNorm\u03b2_1.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.MHA_0.k_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.MHA_0.out_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.MHA_0.q_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.MHA_0.v_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.ln.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.ln.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.out_proj.bias", "write_shape": [2], "chunk_shape": [2], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.out_proj.kernel", "write_shape": [16, 2], "chunk_shape": [16, 2], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.time_proj.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.time_proj.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "rng", "write_shape": [2], "chunk_shape": [2], "ext_metadata": null}}]}
checkpoint_27280/d/cff05287b3aede246c488839ecb7cab7 ADDED
Binary file (29.1 kB). View file
 
checkpoint_27280/manifest.ocdbt ADDED
Binary file (118 Bytes). View file
 
checkpoint_27280/ocdbt.process_0/d/02d7fa9db97eaf0b7ff66055851d601c ADDED
Binary file (623 Bytes). View file
 
checkpoint_27280/ocdbt.process_0/d/0e35fb62a3b985f6ef9391df88e78ca8 ADDED
Binary file (626 Bytes). View file
 
checkpoint_27280/ocdbt.process_0/d/22029e9dcca48c57051023cad3a032b4 ADDED
Binary file (610 Bytes). View file
 
checkpoint_27280/ocdbt.process_0/d/2363ec48f31f5d8f73f699310901cba4 ADDED
Binary file (37.9 kB). View file
 
checkpoint_27280/ocdbt.process_0/d/23e983d6a5482151c96f2ee689adcbe6 ADDED
Binary file (1.37 kB). View file
 
checkpoint_27280/ocdbt.process_0/d/25880371a71d73bfac27d3483bdd63fe ADDED
Binary file (448 Bytes). View file
 
checkpoint_27280/ocdbt.process_0/d/328a3a170f700aee5bb95ad4070f1d18 ADDED
Binary file (657 Bytes). View file
 
checkpoint_27280/ocdbt.process_0/d/35261538c16625716438f160bb074611 ADDED
Binary file (52 kB). View file
 
checkpoint_27280/ocdbt.process_0/d/3797049fbd9502a08b89922e337921ce ADDED
Binary file (614 Bytes). View file
 
checkpoint_27280/ocdbt.process_0/d/38990cc5691b94f0455ee1b21b2d53b0 ADDED
Binary file (621 Bytes). View file
 
checkpoint_27280/ocdbt.process_0/d/4313bcb0b2bcce7df2b8c3d567395b5e ADDED
Binary file (613 Bytes). View file
 
checkpoint_27280/ocdbt.process_0/d/60006b74ff2de74eca23c15027020623 ADDED
Binary file (671 Bytes). View file
 
checkpoint_27280/ocdbt.process_0/d/6cdfbe5d8bda7a3982661599fa0895ca ADDED
Binary file (660 Bytes). View file
 
checkpoint_27280/ocdbt.process_0/d/7a0d374181eecabcb60d0630b935c4b7 ADDED
Binary file (659 Bytes). View file
 
checkpoint_27280/ocdbt.process_0/d/7a40c8a47add879c904f9b94f79cdfd3 ADDED
Binary file (697 Bytes). View file
 
checkpoint_27280/ocdbt.process_0/d/83800d9131dcc9d4cf0acf62eadf1f09 ADDED
Binary file (611 Bytes). View file
 
checkpoint_27280/ocdbt.process_0/d/848209f640b95719304ab53c2441b073 ADDED
Binary file (656 Bytes). View file
 
checkpoint_27280/ocdbt.process_0/d/8ee6c628242266b8fe162b549a25c1b9 ADDED
Binary file (679 Bytes). View file
 
checkpoint_27280/ocdbt.process_0/d/92de87146c3b7be1e9875d8665191916 ADDED
Binary file (652 Bytes). View file
 
checkpoint_27280/ocdbt.process_0/d/9424775ec5b5d7ed4a336e745a6d2862 ADDED
Binary file (614 Bytes). View file
 
checkpoint_27280/ocdbt.process_0/d/9a80f1cc1093313e3899117c6655f418 ADDED
Binary file (681 Bytes). View file
 
checkpoint_27280/ocdbt.process_0/d/9f8072ac44c6a5d43fcfdf7de4aff56b ADDED
Binary file (452 Bytes). View file
 
checkpoint_27280/ocdbt.process_0/d/a9d9fdae64e3c58de85d253540c33e23 ADDED
Binary file (13.7 kB). View file
 
checkpoint_27280/ocdbt.process_0/d/b109f07538a132a6e547d294c4407e3e ADDED
Binary file (614 Bytes). View file
 
checkpoint_27280/ocdbt.process_0/d/b9e2de721c914605bc6193bfe3660d5c ADDED
Binary file (4.53 kB). View file
 
checkpoint_27280/ocdbt.process_0/d/bbdd10b7a48b4c8e3d693b9f5a0f2eb6 ADDED
Binary file (610 Bytes). View file
 
checkpoint_27280/ocdbt.process_0/d/bc252d374759c5c812a471ecbf507cdc ADDED
Binary file (596 Bytes). View file
 
checkpoint_27280/ocdbt.process_0/d/d5c00ef944a95fb411b8837d25fbcec3 ADDED
Binary file (625 Bytes). View file
 
checkpoint_27280/ocdbt.process_0/d/d6ca1c452f681e173c798c9ac25c423e ADDED
Binary file (650 Bytes). View file
 
checkpoint_27280/ocdbt.process_0/d/d7b349bd9b656e81a51ca8d240c03e3a ADDED
Binary file (686 Bytes). View file
 
checkpoint_27280/ocdbt.process_0/d/d988b606c8f2ca671e2348f4223df4dd ADDED
Binary file (628 Bytes). View file
 
checkpoint_27280/ocdbt.process_0/d/e11ef539ce4f08e10af372c1346b91af ADDED
Binary file (1.29 kB). View file
 
checkpoint_27280/ocdbt.process_0/d/e7bfa6397a1bacb0aa6a24ffc9f904fa ADDED
Binary file (646 Bytes). View file
 
checkpoint_27280/ocdbt.process_0/d/eee4bd8c40ce87de795235600c319aed ADDED
Binary file (171 Bytes). View file
 
checkpoint_27280/ocdbt.process_0/d/f16d7e51a1d3effcf351c669507a2857 ADDED
Binary file (637 Bytes). View file
 
checkpoint_27280/ocdbt.process_0/d/f8fce8298920fa6581f76a60f277c75c ADDED
Binary file (632 Bytes). View file
 
checkpoint_27280/ocdbt.process_0/d/fec28a2e75a734aae0fabe4b1365f671 ADDED
Binary file (449 Bytes). View file
 
checkpoint_27280/ocdbt.process_0/manifest.ocdbt ADDED
Binary file (320 Bytes). View file
 
meta.json CHANGED
@@ -6,7 +6,7 @@
6
  "d_model": 16,
7
  "n_heads": 1,
8
  "depth": 2,
9
- "\u03b2": 0.1,
10
  "predict_delta": true,
11
  "seasonal_lag": null,
12
  "out_mode": "mse",
 
6
  "d_model": 16,
7
  "n_heads": 1,
8
  "depth": 2,
9
+ "\u03b2": 0.01,
10
  "predict_delta": true,
11
  "seasonal_lag": null,
12
  "out_mode": "mse",