{ "metadata": { "total_size": 59949080576 }, "weight_map": { "tfopt_for_causal_lm_1/model/decoder/embed_positions/weight:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/embed_tokens/weight:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/final_layer_norm/beta:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/final_layer_norm/gamma:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.0/fc1/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.0/fc1/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.0/fc2/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.0/fc2/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.0/final_layer_norm/beta:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.0/final_layer_norm/gamma:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.0/self_attn/k_proj/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.0/self_attn/k_proj/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.0/self_attn/out_proj/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.0/self_attn/out_proj/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.0/self_attn/q_proj/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.0/self_attn/q_proj/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.0/self_attn/v_proj/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.0/self_attn/v_proj/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.0/self_attn_layer_norm/beta:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.0/self_attn_layer_norm/gamma:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.1/fc1/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.1/fc1/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.1/fc2/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.1/fc2/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.1/final_layer_norm/beta:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.1/final_layer_norm/gamma:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.1/self_attn/k_proj/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.1/self_attn/k_proj/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.1/self_attn/out_proj/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.1/self_attn/out_proj/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.1/self_attn/q_proj/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.1/self_attn/q_proj/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.1/self_attn/v_proj/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.1/self_attn/v_proj/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.1/self_attn_layer_norm/beta:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.1/self_attn_layer_norm/gamma:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.10/fc1/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.10/fc1/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.10/fc2/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.10/fc2/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.10/final_layer_norm/beta:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.10/final_layer_norm/gamma:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.10/self_attn/k_proj/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.10/self_attn/k_proj/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.10/self_attn/out_proj/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.10/self_attn/out_proj/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.10/self_attn/q_proj/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.10/self_attn/q_proj/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.10/self_attn/v_proj/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.10/self_attn/v_proj/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.10/self_attn_layer_norm/beta:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.10/self_attn_layer_norm/gamma:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.11/fc1/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.11/fc1/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.11/fc2/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.11/fc2/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.11/final_layer_norm/beta:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.11/final_layer_norm/gamma:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.11/self_attn/k_proj/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.11/self_attn/k_proj/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.11/self_attn/out_proj/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.11/self_attn/out_proj/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.11/self_attn/q_proj/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.11/self_attn/q_proj/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.11/self_attn/v_proj/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.11/self_attn/v_proj/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.11/self_attn_layer_norm/beta:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.11/self_attn_layer_norm/gamma:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.12/fc1/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.12/fc1/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.12/fc2/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.12/fc2/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.12/final_layer_norm/beta:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.12/final_layer_norm/gamma:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.12/self_attn/k_proj/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.12/self_attn/k_proj/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.12/self_attn/out_proj/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.12/self_attn/out_proj/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.12/self_attn/q_proj/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.12/self_attn/q_proj/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.12/self_attn/v_proj/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.12/self_attn/v_proj/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.12/self_attn_layer_norm/beta:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.12/self_attn_layer_norm/gamma:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.13/fc1/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.13/fc1/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.13/fc2/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.13/fc2/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.13/final_layer_norm/beta:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.13/final_layer_norm/gamma:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.13/self_attn/k_proj/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.13/self_attn/k_proj/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.13/self_attn/out_proj/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.13/self_attn/out_proj/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.13/self_attn/q_proj/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.13/self_attn/q_proj/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.13/self_attn/v_proj/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.13/self_attn/v_proj/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.13/self_attn_layer_norm/beta:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.13/self_attn_layer_norm/gamma:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.14/fc1/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.14/fc1/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.14/fc2/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.14/fc2/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.14/final_layer_norm/beta:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.14/final_layer_norm/gamma:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.14/self_attn/k_proj/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.14/self_attn/k_proj/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.14/self_attn/out_proj/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.14/self_attn/out_proj/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.14/self_attn/q_proj/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.14/self_attn/q_proj/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.14/self_attn/v_proj/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.14/self_attn/v_proj/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.14/self_attn_layer_norm/beta:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.14/self_attn_layer_norm/gamma:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.15/fc1/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.15/fc1/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.15/fc2/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.15/fc2/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.15/final_layer_norm/beta:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.15/final_layer_norm/gamma:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.15/self_attn/k_proj/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.15/self_attn/k_proj/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.15/self_attn/out_proj/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.15/self_attn/out_proj/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.15/self_attn/q_proj/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.15/self_attn/q_proj/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.15/self_attn/v_proj/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.15/self_attn/v_proj/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.15/self_attn_layer_norm/beta:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.15/self_attn_layer_norm/gamma:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.16/fc1/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.16/fc1/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.16/fc2/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.16/fc2/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.16/final_layer_norm/beta:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.16/final_layer_norm/gamma:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.16/self_attn/k_proj/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.16/self_attn/k_proj/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.16/self_attn/out_proj/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.16/self_attn/out_proj/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.16/self_attn/q_proj/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.16/self_attn/q_proj/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.16/self_attn/v_proj/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.16/self_attn/v_proj/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.16/self_attn_layer_norm/beta:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.16/self_attn_layer_norm/gamma:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.17/fc1/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.17/fc1/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.17/fc2/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.17/fc2/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.17/final_layer_norm/beta:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.17/final_layer_norm/gamma:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.17/self_attn/k_proj/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.17/self_attn/k_proj/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.17/self_attn/out_proj/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.17/self_attn/out_proj/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.17/self_attn/q_proj/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.17/self_attn/q_proj/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.17/self_attn/v_proj/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.17/self_attn/v_proj/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.17/self_attn_layer_norm/beta:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.17/self_attn_layer_norm/gamma:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.18/fc1/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.18/fc1/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.18/fc2/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.18/fc2/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.18/final_layer_norm/beta:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.18/final_layer_norm/gamma:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.18/self_attn/k_proj/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.18/self_attn/k_proj/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.18/self_attn/out_proj/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.18/self_attn/out_proj/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.18/self_attn/q_proj/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.18/self_attn/q_proj/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.18/self_attn/v_proj/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.18/self_attn/v_proj/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.18/self_attn_layer_norm/beta:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.18/self_attn_layer_norm/gamma:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.19/fc1/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.19/fc1/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.19/fc2/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.19/fc2/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.19/final_layer_norm/beta:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.19/final_layer_norm/gamma:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.19/self_attn/k_proj/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.19/self_attn/k_proj/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.19/self_attn/out_proj/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.19/self_attn/out_proj/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.19/self_attn/q_proj/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.19/self_attn/q_proj/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.19/self_attn/v_proj/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.19/self_attn/v_proj/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.19/self_attn_layer_norm/beta:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.19/self_attn_layer_norm/gamma:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.2/fc1/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.2/fc1/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.2/fc2/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.2/fc2/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.2/final_layer_norm/beta:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.2/final_layer_norm/gamma:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.2/self_attn/k_proj/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.2/self_attn/k_proj/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.2/self_attn/out_proj/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.2/self_attn/out_proj/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.2/self_attn/q_proj/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.2/self_attn/q_proj/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.2/self_attn/v_proj/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.2/self_attn/v_proj/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.2/self_attn_layer_norm/beta:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.2/self_attn_layer_norm/gamma:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.20/fc1/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.20/fc1/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.20/fc2/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.20/fc2/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.20/final_layer_norm/beta:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.20/final_layer_norm/gamma:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.20/self_attn/k_proj/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.20/self_attn/k_proj/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.20/self_attn/out_proj/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.20/self_attn/out_proj/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.20/self_attn/q_proj/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.20/self_attn/q_proj/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.20/self_attn/v_proj/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.20/self_attn/v_proj/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.20/self_attn_layer_norm/beta:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.20/self_attn_layer_norm/gamma:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.21/fc1/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.21/fc1/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.21/fc2/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.21/fc2/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.21/final_layer_norm/beta:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.21/final_layer_norm/gamma:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.21/self_attn/k_proj/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.21/self_attn/k_proj/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.21/self_attn/out_proj/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.21/self_attn/out_proj/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.21/self_attn/q_proj/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.21/self_attn/q_proj/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.21/self_attn/v_proj/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.21/self_attn/v_proj/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.21/self_attn_layer_norm/beta:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.21/self_attn_layer_norm/gamma:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.22/fc1/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.22/fc1/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.22/fc2/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.22/fc2/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.22/final_layer_norm/beta:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.22/final_layer_norm/gamma:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.22/self_attn/k_proj/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.22/self_attn/k_proj/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.22/self_attn/out_proj/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.22/self_attn/out_proj/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.22/self_attn/q_proj/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.22/self_attn/q_proj/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.22/self_attn/v_proj/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.22/self_attn/v_proj/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.22/self_attn_layer_norm/beta:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.22/self_attn_layer_norm/gamma:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.23/fc1/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.23/fc1/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.23/fc2/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.23/fc2/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.23/final_layer_norm/beta:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.23/final_layer_norm/gamma:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.23/self_attn/k_proj/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.23/self_attn/k_proj/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.23/self_attn/out_proj/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.23/self_attn/out_proj/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.23/self_attn/q_proj/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.23/self_attn/q_proj/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.23/self_attn/v_proj/bias:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.23/self_attn/v_proj/kernel:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.23/self_attn_layer_norm/beta:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.23/self_attn_layer_norm/gamma:0": "tf_model-00003-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.24/fc1/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.24/fc1/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.24/fc2/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.24/fc2/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.24/final_layer_norm/beta:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.24/final_layer_norm/gamma:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.24/self_attn/k_proj/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.24/self_attn/k_proj/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.24/self_attn/out_proj/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.24/self_attn/out_proj/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.24/self_attn/q_proj/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.24/self_attn/q_proj/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.24/self_attn/v_proj/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.24/self_attn/v_proj/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.24/self_attn_layer_norm/beta:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.24/self_attn_layer_norm/gamma:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.25/fc1/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.25/fc1/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.25/fc2/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.25/fc2/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.25/final_layer_norm/beta:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.25/final_layer_norm/gamma:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.25/self_attn/k_proj/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.25/self_attn/k_proj/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.25/self_attn/out_proj/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.25/self_attn/out_proj/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.25/self_attn/q_proj/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.25/self_attn/q_proj/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.25/self_attn/v_proj/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.25/self_attn/v_proj/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.25/self_attn_layer_norm/beta:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.25/self_attn_layer_norm/gamma:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.26/fc1/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.26/fc1/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.26/fc2/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.26/fc2/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.26/final_layer_norm/beta:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.26/final_layer_norm/gamma:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.26/self_attn/k_proj/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.26/self_attn/k_proj/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.26/self_attn/out_proj/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.26/self_attn/out_proj/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.26/self_attn/q_proj/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.26/self_attn/q_proj/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.26/self_attn/v_proj/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.26/self_attn/v_proj/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.26/self_attn_layer_norm/beta:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.26/self_attn_layer_norm/gamma:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.27/fc1/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.27/fc1/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.27/fc2/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.27/fc2/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.27/final_layer_norm/beta:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.27/final_layer_norm/gamma:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.27/self_attn/k_proj/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.27/self_attn/k_proj/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.27/self_attn/out_proj/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.27/self_attn/out_proj/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.27/self_attn/q_proj/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.27/self_attn/q_proj/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.27/self_attn/v_proj/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.27/self_attn/v_proj/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.27/self_attn_layer_norm/beta:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.27/self_attn_layer_norm/gamma:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.28/fc1/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.28/fc1/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.28/fc2/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.28/fc2/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.28/final_layer_norm/beta:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.28/final_layer_norm/gamma:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.28/self_attn/k_proj/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.28/self_attn/k_proj/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.28/self_attn/out_proj/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.28/self_attn/out_proj/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.28/self_attn/q_proj/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.28/self_attn/q_proj/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.28/self_attn/v_proj/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.28/self_attn/v_proj/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.28/self_attn_layer_norm/beta:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.28/self_attn_layer_norm/gamma:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.29/fc1/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.29/fc1/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.29/fc2/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.29/fc2/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.29/final_layer_norm/beta:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.29/final_layer_norm/gamma:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.29/self_attn/k_proj/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.29/self_attn/k_proj/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.29/self_attn/out_proj/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.29/self_attn/out_proj/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.29/self_attn/q_proj/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.29/self_attn/q_proj/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.29/self_attn/v_proj/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.29/self_attn/v_proj/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.29/self_attn_layer_norm/beta:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.29/self_attn_layer_norm/gamma:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.3/fc1/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.3/fc1/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.3/fc2/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.3/fc2/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.3/final_layer_norm/beta:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.3/final_layer_norm/gamma:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.3/self_attn/k_proj/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.3/self_attn/k_proj/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.3/self_attn/out_proj/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.3/self_attn/out_proj/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.3/self_attn/q_proj/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.3/self_attn/q_proj/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.3/self_attn/v_proj/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.3/self_attn/v_proj/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.3/self_attn_layer_norm/beta:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.3/self_attn_layer_norm/gamma:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.30/fc1/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.30/fc1/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.30/fc2/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.30/fc2/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.30/final_layer_norm/beta:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.30/final_layer_norm/gamma:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.30/self_attn/k_proj/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.30/self_attn/k_proj/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.30/self_attn/out_proj/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.30/self_attn/out_proj/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.30/self_attn/q_proj/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.30/self_attn/q_proj/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.30/self_attn/v_proj/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.30/self_attn/v_proj/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.30/self_attn_layer_norm/beta:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.30/self_attn_layer_norm/gamma:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.31/fc1/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.31/fc1/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.31/fc2/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.31/fc2/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.31/final_layer_norm/beta:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.31/final_layer_norm/gamma:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.31/self_attn/k_proj/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.31/self_attn/k_proj/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.31/self_attn/out_proj/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.31/self_attn/out_proj/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.31/self_attn/q_proj/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.31/self_attn/q_proj/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.31/self_attn/v_proj/bias:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.31/self_attn/v_proj/kernel:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.31/self_attn_layer_norm/beta:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.31/self_attn_layer_norm/gamma:0": "tf_model-00004-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.32/fc1/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.32/fc1/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.32/fc2/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.32/fc2/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.32/final_layer_norm/beta:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.32/final_layer_norm/gamma:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.32/self_attn/k_proj/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.32/self_attn/k_proj/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.32/self_attn/out_proj/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.32/self_attn/out_proj/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.32/self_attn/q_proj/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.32/self_attn/q_proj/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.32/self_attn/v_proj/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.32/self_attn/v_proj/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.32/self_attn_layer_norm/beta:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.32/self_attn_layer_norm/gamma:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.33/fc1/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.33/fc1/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.33/fc2/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.33/fc2/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.33/final_layer_norm/beta:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.33/final_layer_norm/gamma:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.33/self_attn/k_proj/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.33/self_attn/k_proj/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.33/self_attn/out_proj/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.33/self_attn/out_proj/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.33/self_attn/q_proj/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.33/self_attn/q_proj/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.33/self_attn/v_proj/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.33/self_attn/v_proj/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.33/self_attn_layer_norm/beta:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.33/self_attn_layer_norm/gamma:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.34/fc1/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.34/fc1/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.34/fc2/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.34/fc2/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.34/final_layer_norm/beta:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.34/final_layer_norm/gamma:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.34/self_attn/k_proj/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.34/self_attn/k_proj/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.34/self_attn/out_proj/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.34/self_attn/out_proj/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.34/self_attn/q_proj/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.34/self_attn/q_proj/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.34/self_attn/v_proj/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.34/self_attn/v_proj/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.34/self_attn_layer_norm/beta:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.34/self_attn_layer_norm/gamma:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.35/fc1/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.35/fc1/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.35/fc2/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.35/fc2/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.35/final_layer_norm/beta:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.35/final_layer_norm/gamma:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.35/self_attn/k_proj/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.35/self_attn/k_proj/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.35/self_attn/out_proj/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.35/self_attn/out_proj/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.35/self_attn/q_proj/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.35/self_attn/q_proj/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.35/self_attn/v_proj/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.35/self_attn/v_proj/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.35/self_attn_layer_norm/beta:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.35/self_attn_layer_norm/gamma:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.36/fc1/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.36/fc1/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.36/fc2/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.36/fc2/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.36/final_layer_norm/beta:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.36/final_layer_norm/gamma:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.36/self_attn/k_proj/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.36/self_attn/k_proj/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.36/self_attn/out_proj/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.36/self_attn/out_proj/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.36/self_attn/q_proj/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.36/self_attn/q_proj/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.36/self_attn/v_proj/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.36/self_attn/v_proj/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.36/self_attn_layer_norm/beta:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.36/self_attn_layer_norm/gamma:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.37/fc1/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.37/fc1/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.37/fc2/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.37/fc2/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.37/final_layer_norm/beta:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.37/final_layer_norm/gamma:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.37/self_attn/k_proj/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.37/self_attn/k_proj/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.37/self_attn/out_proj/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.37/self_attn/out_proj/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.37/self_attn/q_proj/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.37/self_attn/q_proj/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.37/self_attn/v_proj/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.37/self_attn/v_proj/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.37/self_attn_layer_norm/beta:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.37/self_attn_layer_norm/gamma:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.38/fc1/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.38/fc1/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.38/fc2/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.38/fc2/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.38/final_layer_norm/beta:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.38/final_layer_norm/gamma:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.38/self_attn/k_proj/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.38/self_attn/k_proj/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.38/self_attn/out_proj/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.38/self_attn/out_proj/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.38/self_attn/q_proj/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.38/self_attn/q_proj/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.38/self_attn/v_proj/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.38/self_attn/v_proj/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.38/self_attn_layer_norm/beta:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.38/self_attn_layer_norm/gamma:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.39/fc1/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.39/fc1/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.39/fc2/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.39/fc2/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.39/final_layer_norm/beta:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.39/final_layer_norm/gamma:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.39/self_attn/k_proj/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.39/self_attn/k_proj/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.39/self_attn/out_proj/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.39/self_attn/out_proj/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.39/self_attn/q_proj/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.39/self_attn/q_proj/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.39/self_attn/v_proj/bias:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.39/self_attn/v_proj/kernel:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.39/self_attn_layer_norm/beta:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.39/self_attn_layer_norm/gamma:0": "tf_model-00005-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.4/fc1/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.4/fc1/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.4/fc2/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.4/fc2/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.4/final_layer_norm/beta:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.4/final_layer_norm/gamma:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.4/self_attn/k_proj/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.4/self_attn/k_proj/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.4/self_attn/out_proj/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.4/self_attn/out_proj/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.4/self_attn/q_proj/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.4/self_attn/q_proj/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.4/self_attn/v_proj/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.4/self_attn/v_proj/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.4/self_attn_layer_norm/beta:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.4/self_attn_layer_norm/gamma:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.40/fc1/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.40/fc1/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.40/fc2/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.40/fc2/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.40/final_layer_norm/beta:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.40/final_layer_norm/gamma:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.40/self_attn/k_proj/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.40/self_attn/k_proj/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.40/self_attn/out_proj/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.40/self_attn/out_proj/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.40/self_attn/q_proj/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.40/self_attn/q_proj/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.40/self_attn/v_proj/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.40/self_attn/v_proj/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.40/self_attn_layer_norm/beta:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.40/self_attn_layer_norm/gamma:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.41/fc1/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.41/fc1/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.41/fc2/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.41/fc2/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.41/final_layer_norm/beta:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.41/final_layer_norm/gamma:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.41/self_attn/k_proj/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.41/self_attn/k_proj/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.41/self_attn/out_proj/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.41/self_attn/out_proj/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.41/self_attn/q_proj/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.41/self_attn/q_proj/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.41/self_attn/v_proj/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.41/self_attn/v_proj/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.41/self_attn_layer_norm/beta:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.41/self_attn_layer_norm/gamma:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.42/fc1/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.42/fc1/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.42/fc2/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.42/fc2/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.42/final_layer_norm/beta:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.42/final_layer_norm/gamma:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.42/self_attn/k_proj/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.42/self_attn/k_proj/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.42/self_attn/out_proj/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.42/self_attn/out_proj/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.42/self_attn/q_proj/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.42/self_attn/q_proj/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.42/self_attn/v_proj/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.42/self_attn/v_proj/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.42/self_attn_layer_norm/beta:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.42/self_attn_layer_norm/gamma:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.43/fc1/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.43/fc1/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.43/fc2/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.43/fc2/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.43/final_layer_norm/beta:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.43/final_layer_norm/gamma:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.43/self_attn/k_proj/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.43/self_attn/k_proj/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.43/self_attn/out_proj/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.43/self_attn/out_proj/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.43/self_attn/q_proj/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.43/self_attn/q_proj/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.43/self_attn/v_proj/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.43/self_attn/v_proj/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.43/self_attn_layer_norm/beta:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.43/self_attn_layer_norm/gamma:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.44/fc1/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.44/fc1/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.44/fc2/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.44/fc2/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.44/final_layer_norm/beta:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.44/final_layer_norm/gamma:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.44/self_attn/k_proj/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.44/self_attn/k_proj/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.44/self_attn/out_proj/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.44/self_attn/out_proj/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.44/self_attn/q_proj/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.44/self_attn/q_proj/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.44/self_attn/v_proj/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.44/self_attn/v_proj/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.44/self_attn_layer_norm/beta:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.44/self_attn_layer_norm/gamma:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.45/fc1/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.45/fc1/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.45/fc2/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.45/fc2/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.45/final_layer_norm/beta:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.45/final_layer_norm/gamma:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.45/self_attn/k_proj/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.45/self_attn/k_proj/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.45/self_attn/out_proj/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.45/self_attn/out_proj/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.45/self_attn/q_proj/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.45/self_attn/q_proj/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.45/self_attn/v_proj/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.45/self_attn/v_proj/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.45/self_attn_layer_norm/beta:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.45/self_attn_layer_norm/gamma:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.46/fc1/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.46/fc1/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.46/fc2/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.46/fc2/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.46/final_layer_norm/beta:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.46/final_layer_norm/gamma:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.46/self_attn/k_proj/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.46/self_attn/k_proj/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.46/self_attn/out_proj/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.46/self_attn/out_proj/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.46/self_attn/q_proj/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.46/self_attn/q_proj/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.46/self_attn/v_proj/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.46/self_attn/v_proj/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.46/self_attn_layer_norm/beta:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.46/self_attn_layer_norm/gamma:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.47/fc1/bias:0": "tf_model-00007-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.47/fc1/kernel:0": "tf_model-00007-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.47/fc2/bias:0": "tf_model-00007-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.47/fc2/kernel:0": "tf_model-00007-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.47/final_layer_norm/beta:0": "tf_model-00007-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.47/final_layer_norm/gamma:0": "tf_model-00007-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.47/self_attn/k_proj/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.47/self_attn/k_proj/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.47/self_attn/out_proj/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.47/self_attn/out_proj/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.47/self_attn/q_proj/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.47/self_attn/q_proj/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.47/self_attn/v_proj/bias:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.47/self_attn/v_proj/kernel:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.47/self_attn_layer_norm/beta:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.47/self_attn_layer_norm/gamma:0": "tf_model-00006-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.5/fc1/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.5/fc1/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.5/fc2/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.5/fc2/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.5/final_layer_norm/beta:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.5/final_layer_norm/gamma:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.5/self_attn/k_proj/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.5/self_attn/k_proj/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.5/self_attn/out_proj/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.5/self_attn/out_proj/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.5/self_attn/q_proj/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.5/self_attn/q_proj/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.5/self_attn/v_proj/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.5/self_attn/v_proj/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.5/self_attn_layer_norm/beta:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.5/self_attn_layer_norm/gamma:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.6/fc1/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.6/fc1/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.6/fc2/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.6/fc2/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.6/final_layer_norm/beta:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.6/final_layer_norm/gamma:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.6/self_attn/k_proj/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.6/self_attn/k_proj/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.6/self_attn/out_proj/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.6/self_attn/out_proj/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.6/self_attn/q_proj/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.6/self_attn/q_proj/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.6/self_attn/v_proj/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.6/self_attn/v_proj/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.6/self_attn_layer_norm/beta:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.6/self_attn_layer_norm/gamma:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.7/fc1/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.7/fc1/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.7/fc2/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.7/fc2/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.7/final_layer_norm/beta:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.7/final_layer_norm/gamma:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.7/self_attn/k_proj/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.7/self_attn/k_proj/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.7/self_attn/out_proj/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.7/self_attn/out_proj/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.7/self_attn/q_proj/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.7/self_attn/q_proj/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.7/self_attn/v_proj/bias:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.7/self_attn/v_proj/kernel:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.7/self_attn_layer_norm/beta:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.7/self_attn_layer_norm/gamma:0": "tf_model-00001-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.8/fc1/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.8/fc1/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.8/fc2/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.8/fc2/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.8/final_layer_norm/beta:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.8/final_layer_norm/gamma:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.8/self_attn/k_proj/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.8/self_attn/k_proj/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.8/self_attn/out_proj/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.8/self_attn/out_proj/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.8/self_attn/q_proj/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.8/self_attn/q_proj/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.8/self_attn/v_proj/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.8/self_attn/v_proj/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.8/self_attn_layer_norm/beta:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.8/self_attn_layer_norm/gamma:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.9/fc1/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.9/fc1/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.9/fc2/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.9/fc2/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.9/final_layer_norm/beta:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.9/final_layer_norm/gamma:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.9/self_attn/k_proj/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.9/self_attn/k_proj/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.9/self_attn/out_proj/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.9/self_attn/out_proj/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.9/self_attn/q_proj/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.9/self_attn/q_proj/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.9/self_attn/v_proj/bias:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.9/self_attn/v_proj/kernel:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.9/self_attn_layer_norm/beta:0": "tf_model-00002-of-00007.h5", "tfopt_for_causal_lm_1/model/decoder/layers.9/self_attn_layer_norm/gamma:0": "tf_model-00002-of-00007.h5" } }