jcandane commited on
Commit
bbd02e6
·
verified ·
1 Parent(s): 54089f0

Add TGPT checkpoint

Browse files
Files changed (46) hide show
  1. checkpoint_29760/_CHECKPOINT_METADATA +1 -0
  2. checkpoint_29760/_METADATA +1 -0
  3. checkpoint_29760/_sharding +1 -0
  4. checkpoint_29760/array_metadatas/process_0 +1 -0
  5. checkpoint_29760/d/e3bc2af06cdf2f0397e6ef376a4170c1 +0 -0
  6. checkpoint_29760/manifest.ocdbt +0 -0
  7. checkpoint_29760/ocdbt.process_0/d/007ebc530249f4a7e697a549823ba7aa +0 -0
  8. checkpoint_29760/ocdbt.process_0/d/0435fabb3556a406e1fe330017c829d9 +0 -0
  9. checkpoint_29760/ocdbt.process_0/d/1222a19a42ab47b108835795b9a0c803 +0 -0
  10. checkpoint_29760/ocdbt.process_0/d/204736c1c49d2f32c811129bcb179905 +0 -0
  11. checkpoint_29760/ocdbt.process_0/d/2341f560acb9747fc83784ef70b63fa7 +0 -0
  12. checkpoint_29760/ocdbt.process_0/d/28404e7dcc9548c1a308ae65e54938a7 +0 -0
  13. checkpoint_29760/ocdbt.process_0/d/2aa58ee48f550d5e272a56492ba20a90 +0 -0
  14. checkpoint_29760/ocdbt.process_0/d/3ae445e867d7406c9482798cdf2a4d50 +0 -0
  15. checkpoint_29760/ocdbt.process_0/d/3d99b0183fcecf1766782be926c1cf83 +0 -0
  16. checkpoint_29760/ocdbt.process_0/d/4615740e84880ff890077b63c93b4ccf +0 -0
  17. checkpoint_29760/ocdbt.process_0/d/4d7a33651229d78c32da9d630117e04b +0 -0
  18. checkpoint_29760/ocdbt.process_0/d/573bd826d21e3cb76dfa64475510b1b3 +0 -0
  19. checkpoint_29760/ocdbt.process_0/d/5c51741152faf45376bbf8a6e991f966 +0 -0
  20. checkpoint_29760/ocdbt.process_0/d/5f3d29b80f2fb08df9940e3b3723e173 +0 -0
  21. checkpoint_29760/ocdbt.process_0/d/63a1543f5225b32c8974a5e978919fe4 +0 -0
  22. checkpoint_29760/ocdbt.process_0/d/640bbe82b19b0d599873f9f4bab6543f +0 -0
  23. checkpoint_29760/ocdbt.process_0/d/646140bd23749a65703e13bb61371037 +0 -0
  24. checkpoint_29760/ocdbt.process_0/d/6951ca1cf66a0cb66451e1d17af198ba +0 -0
  25. checkpoint_29760/ocdbt.process_0/d/69fb40086f78aeef78a3086c1b48fc65 +0 -0
  26. checkpoint_29760/ocdbt.process_0/d/6b71a602d9e8c6cdade99432a9812e70 +0 -0
  27. checkpoint_29760/ocdbt.process_0/d/6d68861d3b2200aa51e2e536c3860397 +0 -0
  28. checkpoint_29760/ocdbt.process_0/d/7c0bf0782c588385dc35a84e840f3495 +0 -0
  29. checkpoint_29760/ocdbt.process_0/d/81b6302e883bbfa676d547ad9ca5772c +0 -0
  30. checkpoint_29760/ocdbt.process_0/d/91dd4f162a27fcdc5399b47a45f1218b +0 -0
  31. checkpoint_29760/ocdbt.process_0/d/988323db7b4ce92c09d78c4d7cb7ef0e +0 -0
  32. checkpoint_29760/ocdbt.process_0/d/9aa96aaf46e7dca9aca5b851a700a530 +0 -0
  33. checkpoint_29760/ocdbt.process_0/d/a0739aad34a1dfcbe8bbb353f8f3c1a3 +0 -0
  34. checkpoint_29760/ocdbt.process_0/d/ab9502043fa886ae9be7cf67b6c60f3d +0 -0
  35. checkpoint_29760/ocdbt.process_0/d/ac4b1dcaada37bf3f2d86b884ab65960 +0 -0
  36. checkpoint_29760/ocdbt.process_0/d/b432c8656fbca78b2c8198cb1e061992 +0 -0
  37. checkpoint_29760/ocdbt.process_0/d/b4a833b5ba8c4f248ccafe7f36102102 +0 -0
  38. checkpoint_29760/ocdbt.process_0/d/ba7c61d442230c2e7ecfc26a56dd82bb +0 -0
  39. checkpoint_29760/ocdbt.process_0/d/bc1d22ff07b62538de250b068e3f4f21 +0 -0
  40. checkpoint_29760/ocdbt.process_0/d/be4f8fbda5a1209e26e16ef7cb526f28 +0 -0
  41. checkpoint_29760/ocdbt.process_0/d/c3c090c9be044d9708b61feb3de61af2 +0 -0
  42. checkpoint_29760/ocdbt.process_0/d/cfcbf9756b7f7cb2907d4282c74f4dc3 +0 -0
  43. checkpoint_29760/ocdbt.process_0/d/d6ed69df3c31be7b72db2d9770b570a3 +0 -0
  44. checkpoint_29760/ocdbt.process_0/d/fbef88da640e44233eef20f7c62a1421 +0 -0
  45. checkpoint_29760/ocdbt.process_0/manifest.ocdbt +0 -0
  46. meta.json +1 -1
checkpoint_29760/_CHECKPOINT_METADATA ADDED
@@ -0,0 +1 @@
 
 
1
+ {"item_handlers": "orbax.checkpoint._src.handlers.pytree_checkpoint_handler.PyTreeCheckpointHandler", "metrics": {}, "performance_metrics": {}, "init_timestamp_nsecs": 1758139235902924561, "commit_timestamp_nsecs": 1758139236181792555, "custom_metadata": {}}
checkpoint_29760/_METADATA ADDED
@@ -0,0 +1 @@
 
 
1
+ {"tree_metadata": {"('step',)": {"key_metadata": [{"key": "step", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('params', 'in_proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "in_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'in_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "in_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2, 16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'Dense_0', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('params', 'isocoder', 'AttentionBlock_0', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 64]}}, "('params', 'isocoder', 'AttentionBlock_0', 'Dense_1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_0', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_0', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_1', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'k_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'out_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'q_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('params', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'v_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'Dense_0', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('params', 'isocoder', 'AttentionBlock_1', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 64]}}, "('params', 'isocoder', 'AttentionBlock_1', 'Dense_1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_0', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_0', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_1', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_1', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'k_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'out_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'q_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('params', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'v_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('params', 'ln', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "ln", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'ln', 'scale')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "ln", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'out_proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('params', 'out_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 2]}}, "('params', 'time_proj', 'bias')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "time_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('params', 'time_proj', 'kernel')": {"key_metadata": [{"key": "params", "key_type": 2}, {"key": "time_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('opt_state', '0')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "0", "key_type": 1}], "value_metadata": {"value_type": "None", "skip_deserialize": true}}, "('opt_state', '1', '0', 'count')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "count", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": []}}, "('opt_state', '1', '0', 'mu', 'in_proj', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "in_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'in_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "in_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'Dense_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 64]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'Dense_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_0', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_1', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'k_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'out_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'q_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'v_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'Dense_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 64]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'Dense_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_0', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_1', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'k_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'out_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'q_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'mu', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'v_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'mu', 'ln', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "ln", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'ln', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "ln", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'out_proj', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('opt_state', '1', '0', 'mu', 'out_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 2]}}, "('opt_state', '1', '0', 'mu', 'time_proj', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "time_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'mu', 'time_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "mu", "key_type": 2}, {"key": "time_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('opt_state', '1', '0', 'nu', 'in_proj', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "in_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'in_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "in_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'Dense_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 64]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'Dense_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_0', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'LayerNorm\u03b2_1', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'k_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'out_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'q_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_0', 'MHA_0', 'v_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_0", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'Dense_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'Dense_0', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_0", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 64]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'Dense_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'Dense_1', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "Dense_1", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_0', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_0', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_0", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_1', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'LayerNorm\u03b2_1', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "LayerNorm\u03b2_1", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'k_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "k_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'out_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'q_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "q_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'nu', 'isocoder', 'AttentionBlock_1', 'MHA_0', 'v_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "isocoder", "key_type": 2}, {"key": "AttentionBlock_1", "key_type": 2}, {"key": "MHA_0", "key_type": 2}, {"key": "v_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [1, 16, 16]}}, "('opt_state', '1', '0', 'nu', 'ln', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "ln", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'ln', 'scale')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "ln", "key_type": 2}, {"key": "scale", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'out_proj', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}, "('opt_state', '1', '0', 'nu', 'out_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "out_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16, 2]}}, "('opt_state', '1', '0', 'nu', 'time_proj', 'bias')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "time_proj", "key_type": 2}, {"key": "bias", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [16]}}, "('opt_state', '1', '0', 'nu', 'time_proj', 'kernel')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "0", "key_type": 1}, {"key": "nu", "key_type": 2}, {"key": "time_proj", "key_type": 2}, {"key": "kernel", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [64, 16]}}, "('opt_state', '1', '1')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "1", "key_type": 1}], "value_metadata": {"value_type": "None", "skip_deserialize": true}}, "('opt_state', '1', '2')": {"key_metadata": [{"key": "opt_state", "key_type": 2}, {"key": "1", "key_type": 1}, {"key": "2", "key_type": 1}], "value_metadata": {"value_type": "None", "skip_deserialize": true}}, "('rng',)": {"key_metadata": [{"key": "rng", "key_type": 2}], "value_metadata": {"value_type": "jax.Array", "skip_deserialize": false, "write_shape": [2]}}}, "use_zarr3": false, "store_array_data_equal_to_fill_value": true, "custom_metadata": null}
checkpoint_29760/_sharding ADDED
@@ -0,0 +1 @@
 
 
1
+ {"b3B0X3N0YXRlLjEuMC5jb3VudA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS50aW1lX3Byb2ouYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS50aW1lX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pbl9wcm9qLmJpYXM=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pbl9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLk1IQV8wLm91dF9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLk1IQV8wLmtfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLk1IQV8wLnFfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLk1IQV8wLnZfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkRlbnNlXzAuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkRlbnNlXzAua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkRlbnNlXzEuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkRlbnNlXzEua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkxheWVyTm9ybc6yXzAuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkxheWVyTm9ybc6yXzAuc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkxheWVyTm9ybc6yXzEuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkxheWVyTm9ybc6yXzEuc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLk1IQV8wLm91dF9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLk1IQV8wLmtfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLk1IQV8wLnFfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLk1IQV8wLnZfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkRlbnNlXzAuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkRlbnNlXzAua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkRlbnNlXzEuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkRlbnNlXzEua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkxheWVyTm9ybc6yXzAuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkxheWVyTm9ybc6yXzAuc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkxheWVyTm9ybc6yXzEuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkxheWVyTm9ybc6yXzEuc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5sbi5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5sbi5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5vdXRfcHJvai5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5tdS5vdXRfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS50aW1lX3Byb2ouYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS50aW1lX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pbl9wcm9qLmJpYXM=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pbl9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLk1IQV8wLm91dF9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLk1IQV8wLmtfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLk1IQV8wLnFfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLk1IQV8wLnZfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkRlbnNlXzAuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkRlbnNlXzAua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkRlbnNlXzEuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkRlbnNlXzEua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkxheWVyTm9ybc6yXzAuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkxheWVyTm9ybc6yXzAuc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkxheWVyTm9ybc6yXzEuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18wLkxheWVyTm9ybc6yXzEuc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLk1IQV8wLm91dF9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLk1IQV8wLmtfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLk1IQV8wLnFfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLk1IQV8wLnZfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkRlbnNlXzAuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkRlbnNlXzAua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkRlbnNlXzEuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkRlbnNlXzEua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkxheWVyTm9ybc6yXzAuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkxheWVyTm9ybc6yXzAuc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkxheWVyTm9ybc6yXzEuYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5pc29jb2Rlci5BdHRlbnRpb25CbG9ja18xLkxheWVyTm9ybc6yXzEuc2NhbGU=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5sbi5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5sbi5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5vdXRfcHJvai5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","b3B0X3N0YXRlLjEuMC5udS5vdXRfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","c3RlcA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLm91dF9wcm9qLmJpYXM=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLm91dF9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmluX3Byb2ouYmlhcw==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmluX3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuRGVuc2VfMC5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuRGVuc2VfMC5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuRGVuc2VfMS5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuRGVuc2VfMS5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuTGF5ZXJOb3JtzrJfMC5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuTGF5ZXJOb3JtzrJfMC5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuTGF5ZXJOb3JtzrJfMS5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuTGF5ZXJOb3JtzrJfMS5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuTUhBXzAua19wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuTUhBXzAub3V0X3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuTUhBXzAucV9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzAuTUhBXzAudl9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuRGVuc2VfMC5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuRGVuc2VfMC5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuRGVuc2VfMS5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuRGVuc2VfMS5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuTGF5ZXJOb3JtzrJfMC5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuTGF5ZXJOb3JtzrJfMC5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuTGF5ZXJOb3JtzrJfMS5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuTGF5ZXJOb3JtzrJfMS5zY2FsZQ==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuTUhBXzAua19wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuTUhBXzAub3V0X3Byb2oua2VybmVs":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuTUhBXzAucV9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmlzb2NvZGVyLkF0dGVudGlvbkJsb2NrXzEuTUhBXzAudl9wcm9qLmtlcm5lbA==":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmxuLmJpYXM=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLmxuLnNjYWxl":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLnRpbWVfcHJvai5iaWFz":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cGFyYW1zLnRpbWVfcHJvai5rZXJuZWw=":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}","cm5n":"{\"sharding_type\": \"SingleDeviceSharding\", \"device_str\": \"cuda:0\"}"}
checkpoint_29760/array_metadatas/process_0 ADDED
@@ -0,0 +1 @@
 
 
1
+ {"array_metadatas": [{"array_metadata": {"param_name": "step", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.in_proj.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.in_proj.kernel", "write_shape": [2, 16], "chunk_shape": [2, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.Dense_0.bias", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.Dense_0.kernel", "write_shape": [16, 64], "chunk_shape": [16, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.Dense_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.Dense_1.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.LayerNorm\u03b2_0.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.LayerNorm\u03b2_0.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.LayerNorm\u03b2_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.LayerNorm\u03b2_1.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.MHA_0.k_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.MHA_0.out_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.MHA_0.q_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_0.MHA_0.v_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.Dense_0.bias", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.Dense_0.kernel", "write_shape": [16, 64], "chunk_shape": [16, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.Dense_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.Dense_1.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.LayerNorm\u03b2_0.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.LayerNorm\u03b2_0.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.LayerNorm\u03b2_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.LayerNorm\u03b2_1.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.MHA_0.k_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.MHA_0.out_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.MHA_0.q_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.isocoder.AttentionBlock_1.MHA_0.v_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.ln.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.ln.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.out_proj.bias", "write_shape": [2], "chunk_shape": [2], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.out_proj.kernel", "write_shape": [16, 2], "chunk_shape": [16, 2], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.time_proj.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "params.time_proj.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.count", "write_shape": [], "chunk_shape": [], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.in_proj.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.in_proj.kernel", "write_shape": [2, 16], "chunk_shape": [2, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.Dense_0.bias", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.Dense_0.kernel", "write_shape": [16, 64], "chunk_shape": [16, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.Dense_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.Dense_1.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.LayerNorm\u03b2_0.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.LayerNorm\u03b2_0.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.LayerNorm\u03b2_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.LayerNorm\u03b2_1.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.MHA_0.k_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.MHA_0.out_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.MHA_0.q_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_0.MHA_0.v_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.Dense_0.bias", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.Dense_0.kernel", "write_shape": [16, 64], "chunk_shape": [16, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.Dense_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.Dense_1.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.LayerNorm\u03b2_0.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.LayerNorm\u03b2_0.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.LayerNorm\u03b2_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.LayerNorm\u03b2_1.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.MHA_0.k_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.MHA_0.out_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.MHA_0.q_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.isocoder.AttentionBlock_1.MHA_0.v_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.ln.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.ln.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.out_proj.bias", "write_shape": [2], "chunk_shape": [2], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.out_proj.kernel", "write_shape": [16, 2], "chunk_shape": [16, 2], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.time_proj.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.mu.time_proj.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.in_proj.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.in_proj.kernel", "write_shape": [2, 16], "chunk_shape": [2, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.Dense_0.bias", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.Dense_0.kernel", "write_shape": [16, 64], "chunk_shape": [16, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.Dense_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.Dense_1.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.LayerNorm\u03b2_0.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.LayerNorm\u03b2_0.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.LayerNorm\u03b2_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.LayerNorm\u03b2_1.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.MHA_0.k_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.MHA_0.out_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.MHA_0.q_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_0.MHA_0.v_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.Dense_0.bias", "write_shape": [64], "chunk_shape": [64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.Dense_0.kernel", "write_shape": [16, 64], "chunk_shape": [16, 64], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.Dense_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.Dense_1.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.LayerNorm\u03b2_0.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.LayerNorm\u03b2_0.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.LayerNorm\u03b2_1.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.LayerNorm\u03b2_1.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.MHA_0.k_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.MHA_0.out_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.MHA_0.q_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.isocoder.AttentionBlock_1.MHA_0.v_proj.kernel", "write_shape": [1, 16, 16], "chunk_shape": [1, 16, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.ln.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.ln.scale", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.out_proj.bias", "write_shape": [2], "chunk_shape": [2], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.out_proj.kernel", "write_shape": [16, 2], "chunk_shape": [16, 2], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.time_proj.bias", "write_shape": [16], "chunk_shape": [16], "ext_metadata": null}}, {"array_metadata": {"param_name": "opt_state.1.0.nu.time_proj.kernel", "write_shape": [64, 16], "chunk_shape": [64, 16], "ext_metadata": null}}, {"array_metadata": {"param_name": "rng", "write_shape": [2], "chunk_shape": [2], "ext_metadata": null}}]}
checkpoint_29760/d/e3bc2af06cdf2f0397e6ef376a4170c1 ADDED
Binary file (25 kB). View file
 
checkpoint_29760/manifest.ocdbt ADDED
Binary file (118 Bytes). View file
 
checkpoint_29760/ocdbt.process_0/d/007ebc530249f4a7e697a549823ba7aa ADDED
Binary file (610 Bytes). View file
 
checkpoint_29760/ocdbt.process_0/d/0435fabb3556a406e1fe330017c829d9 ADDED
Binary file (674 Bytes). View file
 
checkpoint_29760/ocdbt.process_0/d/1222a19a42ab47b108835795b9a0c803 ADDED
Binary file (614 Bytes). View file
 
checkpoint_29760/ocdbt.process_0/d/204736c1c49d2f32c811129bcb179905 ADDED
Binary file (623 Bytes). View file
 
checkpoint_29760/ocdbt.process_0/d/2341f560acb9747fc83784ef70b63fa7 ADDED
Binary file (652 Bytes). View file
 
checkpoint_29760/ocdbt.process_0/d/28404e7dcc9548c1a308ae65e54938a7 ADDED
Binary file (657 Bytes). View file
 
checkpoint_29760/ocdbt.process_0/d/2aa58ee48f550d5e272a56492ba20a90 ADDED
Binary file (611 Bytes). View file
 
checkpoint_29760/ocdbt.process_0/d/3ae445e867d7406c9482798cdf2a4d50 ADDED
Binary file (486 Bytes). View file
 
checkpoint_29760/ocdbt.process_0/d/3d99b0183fcecf1766782be926c1cf83 ADDED
Binary file (640 Bytes). View file
 
checkpoint_29760/ocdbt.process_0/d/4615740e84880ff890077b63c93b4ccf ADDED
Binary file (1.35 kB). View file
 
checkpoint_29760/ocdbt.process_0/d/4d7a33651229d78c32da9d630117e04b ADDED
Binary file (632 Bytes). View file
 
checkpoint_29760/ocdbt.process_0/d/573bd826d21e3cb76dfa64475510b1b3 ADDED
Binary file (660 Bytes). View file
 
checkpoint_29760/ocdbt.process_0/d/5c51741152faf45376bbf8a6e991f966 ADDED
Binary file (659 Bytes). View file
 
checkpoint_29760/ocdbt.process_0/d/5f3d29b80f2fb08df9940e3b3723e173 ADDED
Binary file (680 Bytes). View file
 
checkpoint_29760/ocdbt.process_0/d/63a1543f5225b32c8974a5e978919fe4 ADDED
Binary file (51 kB). View file
 
checkpoint_29760/ocdbt.process_0/d/640bbe82b19b0d599873f9f4bab6543f ADDED
Binary file (171 Bytes). View file
 
checkpoint_29760/ocdbt.process_0/d/646140bd23749a65703e13bb61371037 ADDED
Binary file (610 Bytes). View file
 
checkpoint_29760/ocdbt.process_0/d/6951ca1cf66a0cb66451e1d17af198ba ADDED
Binary file (681 Bytes). View file
 
checkpoint_29760/ocdbt.process_0/d/69fb40086f78aeef78a3086c1b48fc65 ADDED
Binary file (656 Bytes). View file
 
checkpoint_29760/ocdbt.process_0/d/6b71a602d9e8c6cdade99432a9812e70 ADDED
Binary file (607 Bytes). View file
 
checkpoint_29760/ocdbt.process_0/d/6d68861d3b2200aa51e2e536c3860397 ADDED
Binary file (697 Bytes). View file
 
checkpoint_29760/ocdbt.process_0/d/7c0bf0782c588385dc35a84e840f3495 ADDED
Binary file (451 Bytes). View file
 
checkpoint_29760/ocdbt.process_0/d/81b6302e883bbfa676d547ad9ca5772c ADDED
Binary file (676 Bytes). View file
 
checkpoint_29760/ocdbt.process_0/d/91dd4f162a27fcdc5399b47a45f1218b ADDED
Binary file (13.3 kB). View file
 
checkpoint_29760/ocdbt.process_0/d/988323db7b4ce92c09d78c4d7cb7ef0e ADDED
Binary file (671 Bytes). View file
 
checkpoint_29760/ocdbt.process_0/d/9aa96aaf46e7dca9aca5b851a700a530 ADDED
Binary file (650 Bytes). View file
 
checkpoint_29760/ocdbt.process_0/d/a0739aad34a1dfcbe8bbb353f8f3c1a3 ADDED
Binary file (682 Bytes). View file
 
checkpoint_29760/ocdbt.process_0/d/ab9502043fa886ae9be7cf67b6c60f3d ADDED
Binary file (658 Bytes). View file
 
checkpoint_29760/ocdbt.process_0/d/ac4b1dcaada37bf3f2d86b884ab65960 ADDED
Binary file (671 Bytes). View file
 
checkpoint_29760/ocdbt.process_0/d/b432c8656fbca78b2c8198cb1e061992 ADDED
Binary file (657 Bytes). View file
 
checkpoint_29760/ocdbt.process_0/d/b4a833b5ba8c4f248ccafe7f36102102 ADDED
Binary file (599 Bytes). View file
 
checkpoint_29760/ocdbt.process_0/d/ba7c61d442230c2e7ecfc26a56dd82bb ADDED
Binary file (623 Bytes). View file
 
checkpoint_29760/ocdbt.process_0/d/bc1d22ff07b62538de250b068e3f4f21 ADDED
Binary file (618 Bytes). View file
 
checkpoint_29760/ocdbt.process_0/d/be4f8fbda5a1209e26e16ef7cb526f28 ADDED
Binary file (659 Bytes). View file
 
checkpoint_29760/ocdbt.process_0/d/c3c090c9be044d9708b61feb3de61af2 ADDED
Binary file (629 Bytes). View file
 
checkpoint_29760/ocdbt.process_0/d/cfcbf9756b7f7cb2907d4282c74f4dc3 ADDED
Binary file (38.1 kB). View file
 
checkpoint_29760/ocdbt.process_0/d/d6ed69df3c31be7b72db2d9770b570a3 ADDED
Binary file (611 Bytes). View file
 
checkpoint_29760/ocdbt.process_0/d/fbef88da640e44233eef20f7c62a1421 ADDED
Binary file (1.33 kB). View file
 
checkpoint_29760/ocdbt.process_0/manifest.ocdbt ADDED
Binary file (352 Bytes). View file
 
meta.json CHANGED
@@ -6,7 +6,7 @@
6
  "d_model": 16,
7
  "n_heads": 1,
8
  "depth": 2,
9
- "\u03b2": 0.005,
10
  "predict_delta": true,
11
  "seasonal_lag": null,
12
  "out_mode": "mse",
 
6
  "d_model": 16,
7
  "n_heads": 1,
8
  "depth": 2,
9
+ "\u03b2": 0.001,
10
  "predict_delta": true,
11
  "seasonal_lag": null,
12
  "out_mode": "mse",