update

Browse files

Files changed (4) hide show

README.md +47 -0
config.json +27 -0
tf_model-00001-of-00013.h5 +3 -0
tf_model.h5.index.json +777 -0

README.md ADDED Viewed

	@@ -0,0 +1,47 @@

+---
+tags:
+- generated_from_keras_callback
+model-index:
+- name: opt-30b-sharded
+  results: []
+---
+<!-- This model card has been generated automatically according to the information Keras had access to. You should
+probably proofread and complete it, then remove this comment. -->
+# opt-30b-sharded
+This model was trained from scratch on an unknown dataset.
+It achieves the following results on the evaluation set:
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- optimizer: None
+- training_precision: float32
+### Training results
+### Framework versions
+- Transformers 4.20.0.dev0
+- TensorFlow 2.9.1
+- Datasets 2.2.2
+- Tokenizers 0.12.1

config.json ADDED Viewed

	@@ -0,0 +1,27 @@

+{
+  "_name_or_path": "opt-30b",
+  "activation_dropout": 0.0,
+  "activation_function": "relu",
+  "architectures": [
+    "OPTForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 2,
+  "do_layer_norm_before": true,
+  "dropout": 0.1,
+  "eos_token_id": 2,
+  "ffn_dim": 28672,
+  "hidden_size": 7168,
+  "init_std": 0.02,
+  "layerdrop": 0.0,
+  "max_position_embeddings": 2048,
+  "model_type": "opt",
+  "num_attention_heads": 56,
+  "num_hidden_layers": 48,
+  "pad_token_id": 1,
+  "torch_dtype": "float16",
+  "transformers_version": "4.20.0.dev0",
+  "use_cache": true,
+  "vocab_size": 50272,
+  "word_embed_proj_dim": 7168
+}

tf_model-00001-of-00013.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:39dbe33709fb7a4995e27a6ee4d603742cd05320c3d4f1e9899ae49ca44187a0
+size 9722369432

tf_model.h5.index.json ADDED Viewed

	@@ -0,0 +1,777 @@

+{
+  "metadata": {
+    "total_size": 119898103808
+  },
+  "weight_map": {
+    "decoder.embed_tokens/model.decoder.embed_tokens/weight:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/embed_positions/weight:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.0/fc1/bias:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.0/fc1/kernel:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.0/fc2/bias:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.0/fc2/kernel:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.0/final_layer_norm/beta:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.0/final_layer_norm/gamma:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.0/self_attn/k_proj/bias:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.0/self_attn/k_proj/kernel:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.0/self_attn/out_proj/bias:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.0/self_attn/out_proj/kernel:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.0/self_attn/q_proj/bias:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.0/self_attn/q_proj/kernel:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.0/self_attn/v_proj/bias:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.0/self_attn/v_proj/kernel:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.0/self_attn_layer_norm/beta:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.0/self_attn_layer_norm/gamma:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.1/fc1/bias:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.1/fc1/kernel:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.1/fc2/bias:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.1/fc2/kernel:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.1/final_layer_norm/beta:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.1/final_layer_norm/gamma:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.1/self_attn/k_proj/bias:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.1/self_attn/k_proj/kernel:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.1/self_attn/out_proj/bias:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.1/self_attn/out_proj/kernel:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.1/self_attn/q_proj/bias:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.1/self_attn/q_proj/kernel:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.1/self_attn/v_proj/bias:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.1/self_attn/v_proj/kernel:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.1/self_attn_layer_norm/beta:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.1/self_attn_layer_norm/gamma:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.10/fc1/bias:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.10/fc1/kernel:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.10/fc2/bias:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.10/fc2/kernel:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.10/final_layer_norm/beta:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.10/final_layer_norm/gamma:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.10/self_attn/k_proj/bias:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.10/self_attn/k_proj/kernel:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.10/self_attn/out_proj/bias:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.10/self_attn/out_proj/kernel:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.10/self_attn/q_proj/bias:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.10/self_attn/q_proj/kernel:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.10/self_attn/v_proj/bias:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.10/self_attn/v_proj/kernel:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.10/self_attn_layer_norm/beta:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.10/self_attn_layer_norm/gamma:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.11/fc1/bias:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.11/fc1/kernel:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.11/fc2/bias:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.11/fc2/kernel:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.11/final_layer_norm/beta:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.11/final_layer_norm/gamma:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.11/self_attn/k_proj/bias:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.11/self_attn/k_proj/kernel:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.11/self_attn/out_proj/bias:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.11/self_attn/out_proj/kernel:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.11/self_attn/q_proj/bias:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.11/self_attn/q_proj/kernel:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.11/self_attn/v_proj/bias:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.11/self_attn/v_proj/kernel:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.11/self_attn_layer_norm/beta:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.11/self_attn_layer_norm/gamma:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.12/fc1/bias:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.12/fc1/kernel:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.12/fc2/bias:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.12/fc2/kernel:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.12/final_layer_norm/beta:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.12/final_layer_norm/gamma:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.12/self_attn/k_proj/bias:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.12/self_attn/k_proj/kernel:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.12/self_attn/out_proj/bias:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.12/self_attn/out_proj/kernel:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.12/self_attn/q_proj/bias:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.12/self_attn/q_proj/kernel:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.12/self_attn/v_proj/bias:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.12/self_attn/v_proj/kernel:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.12/self_attn_layer_norm/beta:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.12/self_attn_layer_norm/gamma:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.13/fc1/bias:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.13/fc1/kernel:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.13/fc2/bias:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.13/fc2/kernel:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.13/final_layer_norm/beta:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.13/final_layer_norm/gamma:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.13/self_attn/k_proj/bias:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.13/self_attn/k_proj/kernel:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.13/self_attn/out_proj/bias:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.13/self_attn/out_proj/kernel:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.13/self_attn/q_proj/bias:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.13/self_attn/q_proj/kernel:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.13/self_attn/v_proj/bias:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.13/self_attn/v_proj/kernel:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.13/self_attn_layer_norm/beta:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.13/self_attn_layer_norm/gamma:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.14/fc1/bias:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.14/fc1/kernel:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.14/fc2/bias:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.14/fc2/kernel:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.14/final_layer_norm/beta:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.14/final_layer_norm/gamma:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.14/self_attn/k_proj/bias:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.14/self_attn/k_proj/kernel:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.14/self_attn/out_proj/bias:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.14/self_attn/out_proj/kernel:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.14/self_attn/q_proj/bias:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.14/self_attn/q_proj/kernel:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.14/self_attn/v_proj/bias:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.14/self_attn/v_proj/kernel:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.14/self_attn_layer_norm/beta:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.14/self_attn_layer_norm/gamma:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.15/fc1/bias:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.15/fc1/kernel:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.15/fc2/bias:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.15/fc2/kernel:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.15/final_layer_norm/beta:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.15/final_layer_norm/gamma:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.15/self_attn/k_proj/bias:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.15/self_attn/k_proj/kernel:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.15/self_attn/out_proj/bias:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.15/self_attn/out_proj/kernel:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.15/self_attn/q_proj/bias:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.15/self_attn/q_proj/kernel:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.15/self_attn/v_proj/bias:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.15/self_attn/v_proj/kernel:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.15/self_attn_layer_norm/beta:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.15/self_attn_layer_norm/gamma:0": "tf_model-00004-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.16/fc1/bias:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.16/fc1/kernel:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.16/fc2/bias:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.16/fc2/kernel:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.16/final_layer_norm/beta:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.16/final_layer_norm/gamma:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.16/self_attn/k_proj/bias:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.16/self_attn/k_proj/kernel:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.16/self_attn/out_proj/bias:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.16/self_attn/out_proj/kernel:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.16/self_attn/q_proj/bias:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.16/self_attn/q_proj/kernel:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.16/self_attn/v_proj/bias:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.16/self_attn/v_proj/kernel:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.16/self_attn_layer_norm/beta:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.16/self_attn_layer_norm/gamma:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.17/fc1/bias:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.17/fc1/kernel:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.17/fc2/bias:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.17/fc2/kernel:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.17/final_layer_norm/beta:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.17/final_layer_norm/gamma:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.17/self_attn/k_proj/bias:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.17/self_attn/k_proj/kernel:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.17/self_attn/out_proj/bias:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.17/self_attn/out_proj/kernel:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.17/self_attn/q_proj/bias:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.17/self_attn/q_proj/kernel:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.17/self_attn/v_proj/bias:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.17/self_attn/v_proj/kernel:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.17/self_attn_layer_norm/beta:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.17/self_attn_layer_norm/gamma:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.18/fc1/bias:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.18/fc1/kernel:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.18/fc2/bias:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.18/fc2/kernel:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.18/final_layer_norm/beta:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.18/final_layer_norm/gamma:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.18/self_attn/k_proj/bias:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.18/self_attn/k_proj/kernel:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.18/self_attn/out_proj/bias:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.18/self_attn/out_proj/kernel:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.18/self_attn/q_proj/bias:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.18/self_attn/q_proj/kernel:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.18/self_attn/v_proj/bias:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.18/self_attn/v_proj/kernel:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.18/self_attn_layer_norm/beta:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.18/self_attn_layer_norm/gamma:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.19/fc1/bias:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.19/fc1/kernel:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.19/fc2/bias:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.19/fc2/kernel:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.19/final_layer_norm/beta:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.19/final_layer_norm/gamma:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.19/self_attn/k_proj/bias:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.19/self_attn/k_proj/kernel:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.19/self_attn/out_proj/bias:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.19/self_attn/out_proj/kernel:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.19/self_attn/q_proj/bias:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.19/self_attn/q_proj/kernel:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.19/self_attn/v_proj/bias:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.19/self_attn/v_proj/kernel:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.19/self_attn_layer_norm/beta:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.19/self_attn_layer_norm/gamma:0": "tf_model-00005-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.2/fc1/bias:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.2/fc1/kernel:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.2/fc2/bias:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.2/fc2/kernel:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.2/final_layer_norm/beta:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.2/final_layer_norm/gamma:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.2/self_attn/k_proj/bias:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.2/self_attn/k_proj/kernel:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.2/self_attn/out_proj/bias:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.2/self_attn/out_proj/kernel:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.2/self_attn/q_proj/bias:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.2/self_attn/q_proj/kernel:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.2/self_attn/v_proj/bias:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.2/self_attn/v_proj/kernel:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.2/self_attn_layer_norm/beta:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.2/self_attn_layer_norm/gamma:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.20/fc1/bias:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.20/fc1/kernel:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.20/fc2/bias:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.20/fc2/kernel:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.20/final_layer_norm/beta:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.20/final_layer_norm/gamma:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.20/self_attn/k_proj/bias:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.20/self_attn/k_proj/kernel:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.20/self_attn/out_proj/bias:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.20/self_attn/out_proj/kernel:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.20/self_attn/q_proj/bias:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.20/self_attn/q_proj/kernel:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.20/self_attn/v_proj/bias:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.20/self_attn/v_proj/kernel:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.20/self_attn_layer_norm/beta:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.20/self_attn_layer_norm/gamma:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.21/fc1/bias:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.21/fc1/kernel:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.21/fc2/bias:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.21/fc2/kernel:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.21/final_layer_norm/beta:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.21/final_layer_norm/gamma:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.21/self_attn/k_proj/bias:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.21/self_attn/k_proj/kernel:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.21/self_attn/out_proj/bias:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.21/self_attn/out_proj/kernel:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.21/self_attn/q_proj/bias:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.21/self_attn/q_proj/kernel:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.21/self_attn/v_proj/bias:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.21/self_attn/v_proj/kernel:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.21/self_attn_layer_norm/beta:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.21/self_attn_layer_norm/gamma:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.22/fc1/bias:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.22/fc1/kernel:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.22/fc2/bias:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.22/fc2/kernel:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.22/final_layer_norm/beta:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.22/final_layer_norm/gamma:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.22/self_attn/k_proj/bias:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.22/self_attn/k_proj/kernel:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.22/self_attn/out_proj/bias:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.22/self_attn/out_proj/kernel:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.22/self_attn/q_proj/bias:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.22/self_attn/q_proj/kernel:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.22/self_attn/v_proj/bias:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.22/self_attn/v_proj/kernel:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.22/self_attn_layer_norm/beta:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.22/self_attn_layer_norm/gamma:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.23/fc1/bias:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.23/fc1/kernel:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.23/fc2/bias:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.23/fc2/kernel:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.23/final_layer_norm/beta:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.23/final_layer_norm/gamma:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.23/self_attn/k_proj/bias:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.23/self_attn/k_proj/kernel:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.23/self_attn/out_proj/bias:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.23/self_attn/out_proj/kernel:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.23/self_attn/q_proj/bias:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.23/self_attn/q_proj/kernel:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.23/self_attn/v_proj/bias:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.23/self_attn/v_proj/kernel:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.23/self_attn_layer_norm/beta:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.23/self_attn_layer_norm/gamma:0": "tf_model-00006-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.24/fc1/bias:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.24/fc1/kernel:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.24/fc2/bias:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.24/fc2/kernel:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.24/final_layer_norm/beta:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.24/final_layer_norm/gamma:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.24/self_attn/k_proj/bias:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.24/self_attn/k_proj/kernel:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.24/self_attn/out_proj/bias:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.24/self_attn/out_proj/kernel:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.24/self_attn/q_proj/bias:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.24/self_attn/q_proj/kernel:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.24/self_attn/v_proj/bias:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.24/self_attn/v_proj/kernel:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.24/self_attn_layer_norm/beta:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.24/self_attn_layer_norm/gamma:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.25/fc1/bias:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.25/fc1/kernel:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.25/fc2/bias:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.25/fc2/kernel:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.25/final_layer_norm/beta:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.25/final_layer_norm/gamma:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.25/self_attn/k_proj/bias:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.25/self_attn/k_proj/kernel:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.25/self_attn/out_proj/bias:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.25/self_attn/out_proj/kernel:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.25/self_attn/q_proj/bias:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.25/self_attn/q_proj/kernel:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.25/self_attn/v_proj/bias:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.25/self_attn/v_proj/kernel:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.25/self_attn_layer_norm/beta:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.25/self_attn_layer_norm/gamma:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.26/fc1/bias:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.26/fc1/kernel:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.26/fc2/bias:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.26/fc2/kernel:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.26/final_layer_norm/beta:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.26/final_layer_norm/gamma:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.26/self_attn/k_proj/bias:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.26/self_attn/k_proj/kernel:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.26/self_attn/out_proj/bias:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.26/self_attn/out_proj/kernel:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.26/self_attn/q_proj/bias:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.26/self_attn/q_proj/kernel:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.26/self_attn/v_proj/bias:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.26/self_attn/v_proj/kernel:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.26/self_attn_layer_norm/beta:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.26/self_attn_layer_norm/gamma:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.27/fc1/bias:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.27/fc1/kernel:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.27/fc2/bias:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.27/fc2/kernel:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.27/final_layer_norm/beta:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.27/final_layer_norm/gamma:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.27/self_attn/k_proj/bias:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.27/self_attn/k_proj/kernel:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.27/self_attn/out_proj/bias:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.27/self_attn/out_proj/kernel:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.27/self_attn/q_proj/bias:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.27/self_attn/q_proj/kernel:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.27/self_attn/v_proj/bias:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.27/self_attn/v_proj/kernel:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.27/self_attn_layer_norm/beta:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.27/self_attn_layer_norm/gamma:0": "tf_model-00007-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.28/fc1/bias:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.28/fc1/kernel:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.28/fc2/bias:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.28/fc2/kernel:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.28/final_layer_norm/beta:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.28/final_layer_norm/gamma:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.28/self_attn/k_proj/bias:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.28/self_attn/k_proj/kernel:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.28/self_attn/out_proj/bias:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.28/self_attn/out_proj/kernel:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.28/self_attn/q_proj/bias:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.28/self_attn/q_proj/kernel:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.28/self_attn/v_proj/bias:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.28/self_attn/v_proj/kernel:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.28/self_attn_layer_norm/beta:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.28/self_attn_layer_norm/gamma:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.29/fc1/bias:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.29/fc1/kernel:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.29/fc2/bias:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.29/fc2/kernel:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.29/final_layer_norm/beta:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.29/final_layer_norm/gamma:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.29/self_attn/k_proj/bias:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.29/self_attn/k_proj/kernel:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.29/self_attn/out_proj/bias:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.29/self_attn/out_proj/kernel:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.29/self_attn/q_proj/bias:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.29/self_attn/q_proj/kernel:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.29/self_attn/v_proj/bias:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.29/self_attn/v_proj/kernel:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.29/self_attn_layer_norm/beta:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.29/self_attn_layer_norm/gamma:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.3/fc1/bias:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.3/fc1/kernel:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.3/fc2/bias:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.3/fc2/kernel:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.3/final_layer_norm/beta:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.3/final_layer_norm/gamma:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.3/self_attn/k_proj/bias:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.3/self_attn/k_proj/kernel:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.3/self_attn/out_proj/bias:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.3/self_attn/out_proj/kernel:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.3/self_attn/q_proj/bias:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.3/self_attn/q_proj/kernel:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.3/self_attn/v_proj/bias:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.3/self_attn/v_proj/kernel:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.3/self_attn_layer_norm/beta:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.3/self_attn_layer_norm/gamma:0": "tf_model-00001-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.30/fc1/bias:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.30/fc1/kernel:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.30/fc2/bias:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.30/fc2/kernel:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.30/final_layer_norm/beta:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.30/final_layer_norm/gamma:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.30/self_attn/k_proj/bias:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.30/self_attn/k_proj/kernel:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.30/self_attn/out_proj/bias:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.30/self_attn/out_proj/kernel:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.30/self_attn/q_proj/bias:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.30/self_attn/q_proj/kernel:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.30/self_attn/v_proj/bias:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.30/self_attn/v_proj/kernel:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.30/self_attn_layer_norm/beta:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.30/self_attn_layer_norm/gamma:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.31/fc1/bias:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.31/fc1/kernel:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.31/fc2/bias:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.31/fc2/kernel:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.31/final_layer_norm/beta:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.31/final_layer_norm/gamma:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.31/self_attn/k_proj/bias:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.31/self_attn/k_proj/kernel:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.31/self_attn/out_proj/bias:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.31/self_attn/out_proj/kernel:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.31/self_attn/q_proj/bias:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.31/self_attn/q_proj/kernel:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.31/self_attn/v_proj/bias:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.31/self_attn/v_proj/kernel:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.31/self_attn_layer_norm/beta:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.31/self_attn_layer_norm/gamma:0": "tf_model-00008-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.32/fc1/bias:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.32/fc1/kernel:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.32/fc2/bias:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.32/fc2/kernel:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.32/final_layer_norm/beta:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.32/final_layer_norm/gamma:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.32/self_attn/k_proj/bias:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.32/self_attn/k_proj/kernel:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.32/self_attn/out_proj/bias:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.32/self_attn/out_proj/kernel:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.32/self_attn/q_proj/bias:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.32/self_attn/q_proj/kernel:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.32/self_attn/v_proj/bias:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.32/self_attn/v_proj/kernel:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.32/self_attn_layer_norm/beta:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.32/self_attn_layer_norm/gamma:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.33/fc1/bias:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.33/fc1/kernel:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.33/fc2/bias:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.33/fc2/kernel:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.33/final_layer_norm/beta:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.33/final_layer_norm/gamma:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.33/self_attn/k_proj/bias:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.33/self_attn/k_proj/kernel:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.33/self_attn/out_proj/bias:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.33/self_attn/out_proj/kernel:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.33/self_attn/q_proj/bias:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.33/self_attn/q_proj/kernel:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.33/self_attn/v_proj/bias:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.33/self_attn/v_proj/kernel:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.33/self_attn_layer_norm/beta:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.33/self_attn_layer_norm/gamma:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.34/fc1/bias:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.34/fc1/kernel:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.34/fc2/bias:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.34/fc2/kernel:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.34/final_layer_norm/beta:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.34/final_layer_norm/gamma:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.34/self_attn/k_proj/bias:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.34/self_attn/k_proj/kernel:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.34/self_attn/out_proj/bias:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.34/self_attn/out_proj/kernel:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.34/self_attn/q_proj/bias:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.34/self_attn/q_proj/kernel:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.34/self_attn/v_proj/bias:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.34/self_attn/v_proj/kernel:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.34/self_attn_layer_norm/beta:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.34/self_attn_layer_norm/gamma:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.35/fc1/bias:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.35/fc1/kernel:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.35/fc2/bias:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.35/fc2/kernel:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.35/final_layer_norm/beta:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.35/final_layer_norm/gamma:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.35/self_attn/k_proj/bias:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.35/self_attn/k_proj/kernel:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.35/self_attn/out_proj/bias:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.35/self_attn/out_proj/kernel:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.35/self_attn/q_proj/bias:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.35/self_attn/q_proj/kernel:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.35/self_attn/v_proj/bias:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.35/self_attn/v_proj/kernel:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.35/self_attn_layer_norm/beta:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.35/self_attn_layer_norm/gamma:0": "tf_model-00009-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.36/fc1/bias:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.36/fc1/kernel:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.36/fc2/bias:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.36/fc2/kernel:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.36/final_layer_norm/beta:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.36/final_layer_norm/gamma:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.36/self_attn/k_proj/bias:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.36/self_attn/k_proj/kernel:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.36/self_attn/out_proj/bias:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.36/self_attn/out_proj/kernel:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.36/self_attn/q_proj/bias:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.36/self_attn/q_proj/kernel:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.36/self_attn/v_proj/bias:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.36/self_attn/v_proj/kernel:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.36/self_attn_layer_norm/beta:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.36/self_attn_layer_norm/gamma:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.37/fc1/bias:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.37/fc1/kernel:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.37/fc2/bias:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.37/fc2/kernel:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.37/final_layer_norm/beta:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.37/final_layer_norm/gamma:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.37/self_attn/k_proj/bias:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.37/self_attn/k_proj/kernel:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.37/self_attn/out_proj/bias:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.37/self_attn/out_proj/kernel:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.37/self_attn/q_proj/bias:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.37/self_attn/q_proj/kernel:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.37/self_attn/v_proj/bias:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.37/self_attn/v_proj/kernel:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.37/self_attn_layer_norm/beta:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.37/self_attn_layer_norm/gamma:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.38/fc1/bias:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.38/fc1/kernel:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.38/fc2/bias:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.38/fc2/kernel:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.38/final_layer_norm/beta:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.38/final_layer_norm/gamma:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.38/self_attn/k_proj/bias:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.38/self_attn/k_proj/kernel:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.38/self_attn/out_proj/bias:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.38/self_attn/out_proj/kernel:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.38/self_attn/q_proj/bias:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.38/self_attn/q_proj/kernel:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.38/self_attn/v_proj/bias:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.38/self_attn/v_proj/kernel:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.38/self_attn_layer_norm/beta:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.38/self_attn_layer_norm/gamma:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.39/fc1/bias:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.39/fc1/kernel:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.39/fc2/bias:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.39/fc2/kernel:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.39/final_layer_norm/beta:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.39/final_layer_norm/gamma:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.39/self_attn/k_proj/bias:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.39/self_attn/k_proj/kernel:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.39/self_attn/out_proj/bias:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.39/self_attn/out_proj/kernel:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.39/self_attn/q_proj/bias:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.39/self_attn/q_proj/kernel:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.39/self_attn/v_proj/bias:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.39/self_attn/v_proj/kernel:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.39/self_attn_layer_norm/beta:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.39/self_attn_layer_norm/gamma:0": "tf_model-00010-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.4/fc1/bias:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.4/fc1/kernel:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.4/fc2/bias:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.4/fc2/kernel:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.4/final_layer_norm/beta:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.4/final_layer_norm/gamma:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.4/self_attn/k_proj/bias:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.4/self_attn/k_proj/kernel:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.4/self_attn/out_proj/bias:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.4/self_attn/out_proj/kernel:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.4/self_attn/q_proj/bias:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.4/self_attn/q_proj/kernel:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.4/self_attn/v_proj/bias:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.4/self_attn/v_proj/kernel:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.4/self_attn_layer_norm/beta:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.4/self_attn_layer_norm/gamma:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.40/fc1/bias:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.40/fc1/kernel:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.40/fc2/bias:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.40/fc2/kernel:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.40/final_layer_norm/beta:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.40/final_layer_norm/gamma:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.40/self_attn/k_proj/bias:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.40/self_attn/k_proj/kernel:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.40/self_attn/out_proj/bias:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.40/self_attn/out_proj/kernel:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.40/self_attn/q_proj/bias:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.40/self_attn/q_proj/kernel:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.40/self_attn/v_proj/bias:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.40/self_attn/v_proj/kernel:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.40/self_attn_layer_norm/beta:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.40/self_attn_layer_norm/gamma:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.41/fc1/bias:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.41/fc1/kernel:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.41/fc2/bias:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.41/fc2/kernel:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.41/final_layer_norm/beta:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.41/final_layer_norm/gamma:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.41/self_attn/k_proj/bias:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.41/self_attn/k_proj/kernel:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.41/self_attn/out_proj/bias:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.41/self_attn/out_proj/kernel:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.41/self_attn/q_proj/bias:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.41/self_attn/q_proj/kernel:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.41/self_attn/v_proj/bias:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.41/self_attn/v_proj/kernel:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.41/self_attn_layer_norm/beta:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.41/self_attn_layer_norm/gamma:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.42/fc1/bias:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.42/fc1/kernel:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.42/fc2/bias:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.42/fc2/kernel:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.42/final_layer_norm/beta:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.42/final_layer_norm/gamma:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.42/self_attn/k_proj/bias:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.42/self_attn/k_proj/kernel:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.42/self_attn/out_proj/bias:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.42/self_attn/out_proj/kernel:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.42/self_attn/q_proj/bias:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.42/self_attn/q_proj/kernel:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.42/self_attn/v_proj/bias:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.42/self_attn/v_proj/kernel:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.42/self_attn_layer_norm/beta:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.42/self_attn_layer_norm/gamma:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.43/fc1/bias:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.43/fc1/kernel:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.43/fc2/bias:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.43/fc2/kernel:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.43/final_layer_norm/beta:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.43/final_layer_norm/gamma:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.43/self_attn/k_proj/bias:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.43/self_attn/k_proj/kernel:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.43/self_attn/out_proj/bias:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.43/self_attn/out_proj/kernel:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.43/self_attn/q_proj/bias:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.43/self_attn/q_proj/kernel:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.43/self_attn/v_proj/bias:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.43/self_attn/v_proj/kernel:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.43/self_attn_layer_norm/beta:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.43/self_attn_layer_norm/gamma:0": "tf_model-00011-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.44/fc1/bias:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.44/fc1/kernel:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.44/fc2/bias:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.44/fc2/kernel:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.44/final_layer_norm/beta:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.44/final_layer_norm/gamma:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.44/self_attn/k_proj/bias:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.44/self_attn/k_proj/kernel:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.44/self_attn/out_proj/bias:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.44/self_attn/out_proj/kernel:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.44/self_attn/q_proj/bias:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.44/self_attn/q_proj/kernel:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.44/self_attn/v_proj/bias:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.44/self_attn/v_proj/kernel:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.44/self_attn_layer_norm/beta:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.44/self_attn_layer_norm/gamma:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.45/fc1/bias:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.45/fc1/kernel:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.45/fc2/bias:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.45/fc2/kernel:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.45/final_layer_norm/beta:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.45/final_layer_norm/gamma:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.45/self_attn/k_proj/bias:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.45/self_attn/k_proj/kernel:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.45/self_attn/out_proj/bias:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.45/self_attn/out_proj/kernel:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.45/self_attn/q_proj/bias:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.45/self_attn/q_proj/kernel:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.45/self_attn/v_proj/bias:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.45/self_attn/v_proj/kernel:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.45/self_attn_layer_norm/beta:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.45/self_attn_layer_norm/gamma:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.46/fc1/bias:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.46/fc1/kernel:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.46/fc2/bias:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.46/fc2/kernel:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.46/final_layer_norm/beta:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.46/final_layer_norm/gamma:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.46/self_attn/k_proj/bias:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.46/self_attn/k_proj/kernel:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.46/self_attn/out_proj/bias:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.46/self_attn/out_proj/kernel:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.46/self_attn/q_proj/bias:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.46/self_attn/q_proj/kernel:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.46/self_attn/v_proj/bias:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.46/self_attn/v_proj/kernel:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.46/self_attn_layer_norm/beta:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.46/self_attn_layer_norm/gamma:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.47/fc1/bias:0": "tf_model-00013-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.47/fc1/kernel:0": "tf_model-00013-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.47/fc2/bias:0": "tf_model-00013-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.47/fc2/kernel:0": "tf_model-00013-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.47/final_layer_norm/beta:0": "tf_model-00013-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.47/final_layer_norm/gamma:0": "tf_model-00013-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.47/self_attn/k_proj/bias:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.47/self_attn/k_proj/kernel:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.47/self_attn/out_proj/bias:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.47/self_attn/out_proj/kernel:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.47/self_attn/q_proj/bias:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.47/self_attn/q_proj/kernel:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.47/self_attn/v_proj/bias:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.47/self_attn/v_proj/kernel:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.47/self_attn_layer_norm/beta:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.47/self_attn_layer_norm/gamma:0": "tf_model-00012-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.5/fc1/bias:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.5/fc1/kernel:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.5/fc2/bias:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.5/fc2/kernel:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.5/final_layer_norm/beta:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.5/final_layer_norm/gamma:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.5/self_attn/k_proj/bias:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.5/self_attn/k_proj/kernel:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.5/self_attn/out_proj/bias:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.5/self_attn/out_proj/kernel:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.5/self_attn/q_proj/bias:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.5/self_attn/q_proj/kernel:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.5/self_attn/v_proj/bias:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.5/self_attn/v_proj/kernel:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.5/self_attn_layer_norm/beta:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.5/self_attn_layer_norm/gamma:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.6/fc1/bias:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.6/fc1/kernel:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.6/fc2/bias:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.6/fc2/kernel:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.6/final_layer_norm/beta:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.6/final_layer_norm/gamma:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.6/self_attn/k_proj/bias:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.6/self_attn/k_proj/kernel:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.6/self_attn/out_proj/bias:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.6/self_attn/out_proj/kernel:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.6/self_attn/q_proj/bias:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.6/self_attn/q_proj/kernel:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.6/self_attn/v_proj/bias:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.6/self_attn/v_proj/kernel:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.6/self_attn_layer_norm/beta:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.6/self_attn_layer_norm/gamma:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.7/fc1/bias:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.7/fc1/kernel:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.7/fc2/bias:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.7/fc2/kernel:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.7/final_layer_norm/beta:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.7/final_layer_norm/gamma:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.7/self_attn/k_proj/bias:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.7/self_attn/k_proj/kernel:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.7/self_attn/out_proj/bias:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.7/self_attn/out_proj/kernel:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.7/self_attn/q_proj/bias:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.7/self_attn/q_proj/kernel:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.7/self_attn/v_proj/bias:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.7/self_attn/v_proj/kernel:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.7/self_attn_layer_norm/beta:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.7/self_attn_layer_norm/gamma:0": "tf_model-00002-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.8/fc1/bias:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.8/fc1/kernel:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.8/fc2/bias:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.8/fc2/kernel:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.8/final_layer_norm/beta:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.8/final_layer_norm/gamma:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.8/self_attn/k_proj/bias:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.8/self_attn/k_proj/kernel:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.8/self_attn/out_proj/bias:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.8/self_attn/out_proj/kernel:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.8/self_attn/q_proj/bias:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.8/self_attn/q_proj/kernel:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.8/self_attn/v_proj/bias:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.8/self_attn/v_proj/kernel:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.8/self_attn_layer_norm/beta:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.8/self_attn_layer_norm/gamma:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.9/fc1/bias:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.9/fc1/kernel:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.9/fc2/bias:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.9/fc2/kernel:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.9/final_layer_norm/beta:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.9/final_layer_norm/gamma:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.9/self_attn/k_proj/bias:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.9/self_attn/k_proj/kernel:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.9/self_attn/out_proj/bias:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.9/self_attn/out_proj/kernel:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.9/self_attn/q_proj/bias:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.9/self_attn/q_proj/kernel:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.9/self_attn/v_proj/bias:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.9/self_attn/v_proj/kernel:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.9/self_attn_layer_norm/beta:0": "tf_model-00003-of-00013.h5",
+    "tfopt_for_causal_lm/model/decoder/layers.9/self_attn_layer_norm/gamma:0": "tf_model-00003-of-00013.h5"
+  }
+}