diff --git "a/tf_model.h5.index.json" "b/tf_model.h5.index.json" new file mode 100644--- /dev/null +++ "b/tf_model.h5.index.json" @@ -0,0 +1,1035 @@ +{ + "metadata": { + "total_size": 131439403008 + }, + "weight_map": { + "tfopt_for_causal_lm/model/decoder/embed_positions/weight:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/embed_tokens/weight:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/final_layer_norm/beta:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/final_layer_norm/gamma:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.0/fc1/bias:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.0/fc1/kernel:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.0/fc2/bias:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.0/fc2/kernel:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.0/final_layer_norm/beta:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.0/final_layer_norm/gamma:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.0/self_attn/k_proj/bias:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.0/self_attn/k_proj/kernel:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.0/self_attn/out_proj/bias:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.0/self_attn/out_proj/kernel:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.0/self_attn/q_proj/bias:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.0/self_attn/q_proj/kernel:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.0/self_attn/v_proj/bias:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.0/self_attn/v_proj/kernel:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.0/self_attn_layer_norm/beta:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.0/self_attn_layer_norm/gamma:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.1/fc1/bias:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.1/fc1/kernel:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.1/fc2/bias:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.1/fc2/kernel:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.1/final_layer_norm/beta:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.1/final_layer_norm/gamma:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.1/self_attn/k_proj/bias:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.1/self_attn/k_proj/kernel:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.1/self_attn/out_proj/bias:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.1/self_attn/out_proj/kernel:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.1/self_attn/q_proj/bias:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.1/self_attn/q_proj/kernel:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.1/self_attn/v_proj/bias:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.1/self_attn/v_proj/kernel:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.1/self_attn_layer_norm/beta:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.1/self_attn_layer_norm/gamma:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.10/fc1/bias:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.10/fc1/kernel:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.10/fc2/bias:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.10/fc2/kernel:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.10/final_layer_norm/beta:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.10/final_layer_norm/gamma:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.10/self_attn/k_proj/bias:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.10/self_attn/k_proj/kernel:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.10/self_attn/out_proj/bias:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.10/self_attn/out_proj/kernel:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.10/self_attn/q_proj/bias:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.10/self_attn/q_proj/kernel:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.10/self_attn/v_proj/bias:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.10/self_attn/v_proj/kernel:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.10/self_attn_layer_norm/beta:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.10/self_attn_layer_norm/gamma:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.11/fc1/bias:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.11/fc1/kernel:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.11/fc2/bias:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.11/fc2/kernel:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.11/final_layer_norm/beta:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.11/final_layer_norm/gamma:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.11/self_attn/k_proj/bias:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.11/self_attn/k_proj/kernel:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.11/self_attn/out_proj/bias:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.11/self_attn/out_proj/kernel:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.11/self_attn/q_proj/bias:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.11/self_attn/q_proj/kernel:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.11/self_attn/v_proj/bias:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.11/self_attn/v_proj/kernel:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.11/self_attn_layer_norm/beta:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.11/self_attn_layer_norm/gamma:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.12/fc1/bias:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.12/fc1/kernel:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.12/fc2/bias:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.12/fc2/kernel:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.12/final_layer_norm/beta:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.12/final_layer_norm/gamma:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.12/self_attn/k_proj/bias:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.12/self_attn/k_proj/kernel:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.12/self_attn/out_proj/bias:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.12/self_attn/out_proj/kernel:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.12/self_attn/q_proj/bias:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.12/self_attn/q_proj/kernel:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.12/self_attn/v_proj/bias:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.12/self_attn/v_proj/kernel:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.12/self_attn_layer_norm/beta:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.12/self_attn_layer_norm/gamma:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.13/fc1/bias:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.13/fc1/kernel:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.13/fc2/bias:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.13/fc2/kernel:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.13/final_layer_norm/beta:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.13/final_layer_norm/gamma:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.13/self_attn/k_proj/bias:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.13/self_attn/k_proj/kernel:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.13/self_attn/out_proj/bias:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.13/self_attn/out_proj/kernel:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.13/self_attn/q_proj/bias:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.13/self_attn/q_proj/kernel:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.13/self_attn/v_proj/bias:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.13/self_attn/v_proj/kernel:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.13/self_attn_layer_norm/beta:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.13/self_attn_layer_norm/gamma:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.14/fc1/bias:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.14/fc1/kernel:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.14/fc2/bias:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.14/fc2/kernel:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.14/final_layer_norm/beta:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.14/final_layer_norm/gamma:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.14/self_attn/k_proj/bias:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.14/self_attn/k_proj/kernel:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.14/self_attn/out_proj/bias:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.14/self_attn/out_proj/kernel:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.14/self_attn/q_proj/bias:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.14/self_attn/q_proj/kernel:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.14/self_attn/v_proj/bias:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.14/self_attn/v_proj/kernel:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.14/self_attn_layer_norm/beta:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.14/self_attn_layer_norm/gamma:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.15/fc1/bias:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.15/fc1/kernel:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.15/fc2/bias:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.15/fc2/kernel:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.15/final_layer_norm/beta:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.15/final_layer_norm/gamma:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.15/self_attn/k_proj/bias:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.15/self_attn/k_proj/kernel:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.15/self_attn/out_proj/bias:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.15/self_attn/out_proj/kernel:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.15/self_attn/q_proj/bias:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.15/self_attn/q_proj/kernel:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.15/self_attn/v_proj/bias:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.15/self_attn/v_proj/kernel:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.15/self_attn_layer_norm/beta:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.15/self_attn_layer_norm/gamma:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.16/fc1/bias:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.16/fc1/kernel:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.16/fc2/bias:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.16/fc2/kernel:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.16/final_layer_norm/beta:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.16/final_layer_norm/gamma:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.16/self_attn/k_proj/bias:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.16/self_attn/k_proj/kernel:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.16/self_attn/out_proj/bias:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.16/self_attn/out_proj/kernel:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.16/self_attn/q_proj/bias:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.16/self_attn/q_proj/kernel:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.16/self_attn/v_proj/bias:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.16/self_attn/v_proj/kernel:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.16/self_attn_layer_norm/beta:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.16/self_attn_layer_norm/gamma:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.17/fc1/bias:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.17/fc1/kernel:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.17/fc2/bias:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.17/fc2/kernel:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.17/final_layer_norm/beta:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.17/final_layer_norm/gamma:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.17/self_attn/k_proj/bias:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.17/self_attn/k_proj/kernel:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.17/self_attn/out_proj/bias:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.17/self_attn/out_proj/kernel:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.17/self_attn/q_proj/bias:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.17/self_attn/q_proj/kernel:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.17/self_attn/v_proj/bias:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.17/self_attn/v_proj/kernel:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.17/self_attn_layer_norm/beta:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.17/self_attn_layer_norm/gamma:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.18/fc1/bias:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.18/fc1/kernel:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.18/fc2/bias:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.18/fc2/kernel:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.18/final_layer_norm/beta:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.18/final_layer_norm/gamma:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.18/self_attn/k_proj/bias:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.18/self_attn/k_proj/kernel:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.18/self_attn/out_proj/bias:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.18/self_attn/out_proj/kernel:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.18/self_attn/q_proj/bias:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.18/self_attn/q_proj/kernel:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.18/self_attn/v_proj/bias:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.18/self_attn/v_proj/kernel:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.18/self_attn_layer_norm/beta:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.18/self_attn_layer_norm/gamma:0": "tf_model-00004-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.19/fc1/bias:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.19/fc1/kernel:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.19/fc2/bias:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.19/fc2/kernel:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.19/final_layer_norm/beta:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.19/final_layer_norm/gamma:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.19/self_attn/k_proj/bias:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.19/self_attn/k_proj/kernel:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.19/self_attn/out_proj/bias:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.19/self_attn/out_proj/kernel:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.19/self_attn/q_proj/bias:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.19/self_attn/q_proj/kernel:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.19/self_attn/v_proj/bias:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.19/self_attn/v_proj/kernel:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.19/self_attn_layer_norm/beta:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.19/self_attn_layer_norm/gamma:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.2/fc1/bias:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.2/fc1/kernel:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.2/fc2/bias:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.2/fc2/kernel:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.2/final_layer_norm/beta:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.2/final_layer_norm/gamma:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.2/self_attn/k_proj/bias:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.2/self_attn/k_proj/kernel:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.2/self_attn/out_proj/bias:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.2/self_attn/out_proj/kernel:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.2/self_attn/q_proj/bias:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.2/self_attn/q_proj/kernel:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.2/self_attn/v_proj/bias:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.2/self_attn/v_proj/kernel:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.2/self_attn_layer_norm/beta:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.2/self_attn_layer_norm/gamma:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.20/fc1/bias:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.20/fc1/kernel:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.20/fc2/bias:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.20/fc2/kernel:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.20/final_layer_norm/beta:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.20/final_layer_norm/gamma:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.20/self_attn/k_proj/bias:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.20/self_attn/k_proj/kernel:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.20/self_attn/out_proj/bias:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.20/self_attn/out_proj/kernel:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.20/self_attn/q_proj/bias:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.20/self_attn/q_proj/kernel:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.20/self_attn/v_proj/bias:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.20/self_attn/v_proj/kernel:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.20/self_attn_layer_norm/beta:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.20/self_attn_layer_norm/gamma:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.21/fc1/bias:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.21/fc1/kernel:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.21/fc2/bias:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.21/fc2/kernel:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.21/final_layer_norm/beta:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.21/final_layer_norm/gamma:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.21/self_attn/k_proj/bias:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.21/self_attn/k_proj/kernel:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.21/self_attn/out_proj/bias:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.21/self_attn/out_proj/kernel:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.21/self_attn/q_proj/bias:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.21/self_attn/q_proj/kernel:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.21/self_attn/v_proj/bias:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.21/self_attn/v_proj/kernel:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.21/self_attn_layer_norm/beta:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.21/self_attn_layer_norm/gamma:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.22/fc1/bias:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.22/fc1/kernel:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.22/fc2/bias:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.22/fc2/kernel:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.22/final_layer_norm/beta:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.22/final_layer_norm/gamma:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.22/self_attn/k_proj/bias:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.22/self_attn/k_proj/kernel:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.22/self_attn/out_proj/bias:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.22/self_attn/out_proj/kernel:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.22/self_attn/q_proj/bias:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.22/self_attn/q_proj/kernel:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.22/self_attn/v_proj/bias:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.22/self_attn/v_proj/kernel:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.22/self_attn_layer_norm/beta:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.22/self_attn_layer_norm/gamma:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.23/fc1/bias:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.23/fc1/kernel:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.23/fc2/bias:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.23/fc2/kernel:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.23/final_layer_norm/beta:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.23/final_layer_norm/gamma:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.23/self_attn/k_proj/bias:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.23/self_attn/k_proj/kernel:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.23/self_attn/out_proj/bias:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.23/self_attn/out_proj/kernel:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.23/self_attn/q_proj/bias:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.23/self_attn/q_proj/kernel:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.23/self_attn/v_proj/bias:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.23/self_attn/v_proj/kernel:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.23/self_attn_layer_norm/beta:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.23/self_attn_layer_norm/gamma:0": "tf_model-00005-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.24/fc1/bias:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.24/fc1/kernel:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.24/fc2/bias:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.24/fc2/kernel:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.24/final_layer_norm/beta:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.24/final_layer_norm/gamma:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.24/self_attn/k_proj/bias:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.24/self_attn/k_proj/kernel:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.24/self_attn/out_proj/bias:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.24/self_attn/out_proj/kernel:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.24/self_attn/q_proj/bias:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.24/self_attn/q_proj/kernel:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.24/self_attn/v_proj/bias:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.24/self_attn/v_proj/kernel:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.24/self_attn_layer_norm/beta:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.24/self_attn_layer_norm/gamma:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.25/fc1/bias:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.25/fc1/kernel:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.25/fc2/bias:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.25/fc2/kernel:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.25/final_layer_norm/beta:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.25/final_layer_norm/gamma:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.25/self_attn/k_proj/bias:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.25/self_attn/k_proj/kernel:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.25/self_attn/out_proj/bias:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.25/self_attn/out_proj/kernel:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.25/self_attn/q_proj/bias:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.25/self_attn/q_proj/kernel:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.25/self_attn/v_proj/bias:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.25/self_attn/v_proj/kernel:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.25/self_attn_layer_norm/beta:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.25/self_attn_layer_norm/gamma:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.26/fc1/bias:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.26/fc1/kernel:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.26/fc2/bias:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.26/fc2/kernel:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.26/final_layer_norm/beta:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.26/final_layer_norm/gamma:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.26/self_attn/k_proj/bias:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.26/self_attn/k_proj/kernel:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.26/self_attn/out_proj/bias:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.26/self_attn/out_proj/kernel:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.26/self_attn/q_proj/bias:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.26/self_attn/q_proj/kernel:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.26/self_attn/v_proj/bias:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.26/self_attn/v_proj/kernel:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.26/self_attn_layer_norm/beta:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.26/self_attn_layer_norm/gamma:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.27/fc1/bias:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.27/fc1/kernel:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.27/fc2/bias:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.27/fc2/kernel:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.27/final_layer_norm/beta:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.27/final_layer_norm/gamma:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.27/self_attn/k_proj/bias:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.27/self_attn/k_proj/kernel:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.27/self_attn/out_proj/bias:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.27/self_attn/out_proj/kernel:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.27/self_attn/q_proj/bias:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.27/self_attn/q_proj/kernel:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.27/self_attn/v_proj/bias:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.27/self_attn/v_proj/kernel:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.27/self_attn_layer_norm/beta:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.27/self_attn_layer_norm/gamma:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.28/fc1/bias:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.28/fc1/kernel:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.28/fc2/bias:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.28/fc2/kernel:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.28/final_layer_norm/beta:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.28/final_layer_norm/gamma:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.28/self_attn/k_proj/bias:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.28/self_attn/k_proj/kernel:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.28/self_attn/out_proj/bias:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.28/self_attn/out_proj/kernel:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.28/self_attn/q_proj/bias:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.28/self_attn/q_proj/kernel:0": "tf_model-00006-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.28/self_attn/v_proj/bias:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.28/self_attn/v_proj/kernel:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.28/self_attn_layer_norm/beta:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.28/self_attn_layer_norm/gamma:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.29/fc1/bias:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.29/fc1/kernel:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.29/fc2/bias:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.29/fc2/kernel:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.29/final_layer_norm/beta:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.29/final_layer_norm/gamma:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.29/self_attn/k_proj/bias:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.29/self_attn/k_proj/kernel:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.29/self_attn/out_proj/bias:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.29/self_attn/out_proj/kernel:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.29/self_attn/q_proj/bias:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.29/self_attn/q_proj/kernel:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.29/self_attn/v_proj/bias:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.29/self_attn/v_proj/kernel:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.29/self_attn_layer_norm/beta:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.29/self_attn_layer_norm/gamma:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.3/fc1/bias:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.3/fc1/kernel:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.3/fc2/bias:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.3/fc2/kernel:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.3/final_layer_norm/beta:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.3/final_layer_norm/gamma:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.3/self_attn/k_proj/bias:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.3/self_attn/k_proj/kernel:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.3/self_attn/out_proj/bias:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.3/self_attn/out_proj/kernel:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.3/self_attn/q_proj/bias:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.3/self_attn/q_proj/kernel:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.3/self_attn/v_proj/bias:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.3/self_attn/v_proj/kernel:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.3/self_attn_layer_norm/beta:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.3/self_attn_layer_norm/gamma:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.30/fc1/bias:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.30/fc1/kernel:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.30/fc2/bias:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.30/fc2/kernel:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.30/final_layer_norm/beta:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.30/final_layer_norm/gamma:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.30/self_attn/k_proj/bias:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.30/self_attn/k_proj/kernel:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.30/self_attn/out_proj/bias:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.30/self_attn/out_proj/kernel:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.30/self_attn/q_proj/bias:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.30/self_attn/q_proj/kernel:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.30/self_attn/v_proj/bias:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.30/self_attn/v_proj/kernel:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.30/self_attn_layer_norm/beta:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.30/self_attn_layer_norm/gamma:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.31/fc1/bias:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.31/fc1/kernel:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.31/fc2/bias:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.31/fc2/kernel:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.31/final_layer_norm/beta:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.31/final_layer_norm/gamma:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.31/self_attn/k_proj/bias:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.31/self_attn/k_proj/kernel:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.31/self_attn/out_proj/bias:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.31/self_attn/out_proj/kernel:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.31/self_attn/q_proj/bias:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.31/self_attn/q_proj/kernel:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.31/self_attn/v_proj/bias:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.31/self_attn/v_proj/kernel:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.31/self_attn_layer_norm/beta:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.31/self_attn_layer_norm/gamma:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.32/fc1/bias:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.32/fc1/kernel:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.32/fc2/bias:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.32/fc2/kernel:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.32/final_layer_norm/beta:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.32/final_layer_norm/gamma:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.32/self_attn/k_proj/bias:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.32/self_attn/k_proj/kernel:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.32/self_attn/out_proj/bias:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.32/self_attn/out_proj/kernel:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.32/self_attn/q_proj/bias:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.32/self_attn/q_proj/kernel:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.32/self_attn/v_proj/bias:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.32/self_attn/v_proj/kernel:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.32/self_attn_layer_norm/beta:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.32/self_attn_layer_norm/gamma:0": "tf_model-00007-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.33/fc1/bias:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.33/fc1/kernel:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.33/fc2/bias:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.33/fc2/kernel:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.33/final_layer_norm/beta:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.33/final_layer_norm/gamma:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.33/self_attn/k_proj/bias:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.33/self_attn/k_proj/kernel:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.33/self_attn/out_proj/bias:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.33/self_attn/out_proj/kernel:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.33/self_attn/q_proj/bias:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.33/self_attn/q_proj/kernel:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.33/self_attn/v_proj/bias:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.33/self_attn/v_proj/kernel:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.33/self_attn_layer_norm/beta:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.33/self_attn_layer_norm/gamma:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.34/fc1/bias:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.34/fc1/kernel:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.34/fc2/bias:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.34/fc2/kernel:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.34/final_layer_norm/beta:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.34/final_layer_norm/gamma:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.34/self_attn/k_proj/bias:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.34/self_attn/k_proj/kernel:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.34/self_attn/out_proj/bias:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.34/self_attn/out_proj/kernel:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.34/self_attn/q_proj/bias:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.34/self_attn/q_proj/kernel:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.34/self_attn/v_proj/bias:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.34/self_attn/v_proj/kernel:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.34/self_attn_layer_norm/beta:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.34/self_attn_layer_norm/gamma:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.35/fc1/bias:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.35/fc1/kernel:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.35/fc2/bias:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.35/fc2/kernel:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.35/final_layer_norm/beta:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.35/final_layer_norm/gamma:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.35/self_attn/k_proj/bias:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.35/self_attn/k_proj/kernel:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.35/self_attn/out_proj/bias:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.35/self_attn/out_proj/kernel:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.35/self_attn/q_proj/bias:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.35/self_attn/q_proj/kernel:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.35/self_attn/v_proj/bias:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.35/self_attn/v_proj/kernel:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.35/self_attn_layer_norm/beta:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.35/self_attn_layer_norm/gamma:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.36/fc1/bias:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.36/fc1/kernel:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.36/fc2/bias:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.36/fc2/kernel:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.36/final_layer_norm/beta:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.36/final_layer_norm/gamma:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.36/self_attn/k_proj/bias:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.36/self_attn/k_proj/kernel:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.36/self_attn/out_proj/bias:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.36/self_attn/out_proj/kernel:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.36/self_attn/q_proj/bias:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.36/self_attn/q_proj/kernel:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.36/self_attn/v_proj/bias:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.36/self_attn/v_proj/kernel:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.36/self_attn_layer_norm/beta:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.36/self_attn_layer_norm/gamma:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.37/fc1/bias:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.37/fc1/kernel:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.37/fc2/bias:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.37/fc2/kernel:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.37/final_layer_norm/beta:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.37/final_layer_norm/gamma:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.37/self_attn/k_proj/bias:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.37/self_attn/k_proj/kernel:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.37/self_attn/out_proj/bias:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.37/self_attn/out_proj/kernel:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.37/self_attn/q_proj/bias:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.37/self_attn/q_proj/kernel:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.37/self_attn/v_proj/bias:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.37/self_attn/v_proj/kernel:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.37/self_attn_layer_norm/beta:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.37/self_attn_layer_norm/gamma:0": "tf_model-00008-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.38/fc1/bias:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.38/fc1/kernel:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.38/fc2/bias:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.38/fc2/kernel:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.38/final_layer_norm/beta:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.38/final_layer_norm/gamma:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.38/self_attn/k_proj/bias:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.38/self_attn/k_proj/kernel:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.38/self_attn/out_proj/bias:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.38/self_attn/out_proj/kernel:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.38/self_attn/q_proj/bias:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.38/self_attn/q_proj/kernel:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.38/self_attn/v_proj/bias:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.38/self_attn/v_proj/kernel:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.38/self_attn_layer_norm/beta:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.38/self_attn_layer_norm/gamma:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.39/fc1/bias:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.39/fc1/kernel:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.39/fc2/bias:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.39/fc2/kernel:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.39/final_layer_norm/beta:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.39/final_layer_norm/gamma:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.39/self_attn/k_proj/bias:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.39/self_attn/k_proj/kernel:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.39/self_attn/out_proj/bias:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.39/self_attn/out_proj/kernel:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.39/self_attn/q_proj/bias:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.39/self_attn/q_proj/kernel:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.39/self_attn/v_proj/bias:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.39/self_attn/v_proj/kernel:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.39/self_attn_layer_norm/beta:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.39/self_attn_layer_norm/gamma:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.4/fc1/bias:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.4/fc1/kernel:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.4/fc2/bias:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.4/fc2/kernel:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.4/final_layer_norm/beta:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.4/final_layer_norm/gamma:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.4/self_attn/k_proj/bias:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.4/self_attn/k_proj/kernel:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.4/self_attn/out_proj/bias:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.4/self_attn/out_proj/kernel:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.4/self_attn/q_proj/bias:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.4/self_attn/q_proj/kernel:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.4/self_attn/v_proj/bias:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.4/self_attn/v_proj/kernel:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.4/self_attn_layer_norm/beta:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.4/self_attn_layer_norm/gamma:0": "tf_model-00001-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.40/fc1/bias:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.40/fc1/kernel:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.40/fc2/bias:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.40/fc2/kernel:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.40/final_layer_norm/beta:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.40/final_layer_norm/gamma:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.40/self_attn/k_proj/bias:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.40/self_attn/k_proj/kernel:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.40/self_attn/out_proj/bias:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.40/self_attn/out_proj/kernel:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.40/self_attn/q_proj/bias:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.40/self_attn/q_proj/kernel:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.40/self_attn/v_proj/bias:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.40/self_attn/v_proj/kernel:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.40/self_attn_layer_norm/beta:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.40/self_attn_layer_norm/gamma:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.41/fc1/bias:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.41/fc1/kernel:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.41/fc2/bias:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.41/fc2/kernel:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.41/final_layer_norm/beta:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.41/final_layer_norm/gamma:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.41/self_attn/k_proj/bias:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.41/self_attn/k_proj/kernel:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.41/self_attn/out_proj/bias:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.41/self_attn/out_proj/kernel:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.41/self_attn/q_proj/bias:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.41/self_attn/q_proj/kernel:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.41/self_attn/v_proj/bias:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.41/self_attn/v_proj/kernel:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.41/self_attn_layer_norm/beta:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.41/self_attn_layer_norm/gamma:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.42/fc1/bias:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.42/fc1/kernel:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.42/fc2/bias:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.42/fc2/kernel:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.42/final_layer_norm/beta:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.42/final_layer_norm/gamma:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.42/self_attn/k_proj/bias:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.42/self_attn/k_proj/kernel:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.42/self_attn/out_proj/bias:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.42/self_attn/out_proj/kernel:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.42/self_attn/q_proj/bias:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.42/self_attn/q_proj/kernel:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.42/self_attn/v_proj/bias:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.42/self_attn/v_proj/kernel:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.42/self_attn_layer_norm/beta:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.42/self_attn_layer_norm/gamma:0": "tf_model-00009-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.43/fc1/bias:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.43/fc1/kernel:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.43/fc2/bias:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.43/fc2/kernel:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.43/final_layer_norm/beta:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.43/final_layer_norm/gamma:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.43/self_attn/k_proj/bias:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.43/self_attn/k_proj/kernel:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.43/self_attn/out_proj/bias:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.43/self_attn/out_proj/kernel:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.43/self_attn/q_proj/bias:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.43/self_attn/q_proj/kernel:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.43/self_attn/v_proj/bias:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.43/self_attn/v_proj/kernel:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.43/self_attn_layer_norm/beta:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.43/self_attn_layer_norm/gamma:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.44/fc1/bias:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.44/fc1/kernel:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.44/fc2/bias:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.44/fc2/kernel:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.44/final_layer_norm/beta:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.44/final_layer_norm/gamma:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.44/self_attn/k_proj/bias:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.44/self_attn/k_proj/kernel:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.44/self_attn/out_proj/bias:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.44/self_attn/out_proj/kernel:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.44/self_attn/q_proj/bias:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.44/self_attn/q_proj/kernel:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.44/self_attn/v_proj/bias:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.44/self_attn/v_proj/kernel:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.44/self_attn_layer_norm/beta:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.44/self_attn_layer_norm/gamma:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.45/fc1/bias:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.45/fc1/kernel:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.45/fc2/bias:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.45/fc2/kernel:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.45/final_layer_norm/beta:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.45/final_layer_norm/gamma:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.45/self_attn/k_proj/bias:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.45/self_attn/k_proj/kernel:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.45/self_attn/out_proj/bias:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.45/self_attn/out_proj/kernel:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.45/self_attn/q_proj/bias:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.45/self_attn/q_proj/kernel:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.45/self_attn/v_proj/bias:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.45/self_attn/v_proj/kernel:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.45/self_attn_layer_norm/beta:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.45/self_attn_layer_norm/gamma:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.46/fc1/bias:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.46/fc1/kernel:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.46/fc2/bias:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.46/fc2/kernel:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.46/final_layer_norm/beta:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.46/final_layer_norm/gamma:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.46/self_attn/k_proj/bias:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.46/self_attn/k_proj/kernel:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.46/self_attn/out_proj/bias:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.46/self_attn/out_proj/kernel:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.46/self_attn/q_proj/bias:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.46/self_attn/q_proj/kernel:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.46/self_attn/v_proj/bias:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.46/self_attn/v_proj/kernel:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.46/self_attn_layer_norm/beta:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.46/self_attn_layer_norm/gamma:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.47/fc1/bias:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.47/fc1/kernel:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.47/fc2/bias:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.47/fc2/kernel:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.47/final_layer_norm/beta:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.47/final_layer_norm/gamma:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.47/self_attn/k_proj/bias:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.47/self_attn/k_proj/kernel:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.47/self_attn/out_proj/bias:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.47/self_attn/out_proj/kernel:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.47/self_attn/q_proj/bias:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.47/self_attn/q_proj/kernel:0": "tf_model-00010-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.47/self_attn/v_proj/bias:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.47/self_attn/v_proj/kernel:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.47/self_attn_layer_norm/beta:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.47/self_attn_layer_norm/gamma:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.48/fc1/bias:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.48/fc1/kernel:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.48/fc2/bias:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.48/fc2/kernel:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.48/final_layer_norm/beta:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.48/final_layer_norm/gamma:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.48/self_attn/k_proj/bias:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.48/self_attn/k_proj/kernel:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.48/self_attn/out_proj/bias:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.48/self_attn/out_proj/kernel:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.48/self_attn/q_proj/bias:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.48/self_attn/q_proj/kernel:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.48/self_attn/v_proj/bias:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.48/self_attn/v_proj/kernel:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.48/self_attn_layer_norm/beta:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.48/self_attn_layer_norm/gamma:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.49/fc1/bias:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.49/fc1/kernel:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.49/fc2/bias:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.49/fc2/kernel:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.49/final_layer_norm/beta:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.49/final_layer_norm/gamma:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.49/self_attn/k_proj/bias:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.49/self_attn/k_proj/kernel:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.49/self_attn/out_proj/bias:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.49/self_attn/out_proj/kernel:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.49/self_attn/q_proj/bias:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.49/self_attn/q_proj/kernel:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.49/self_attn/v_proj/bias:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.49/self_attn/v_proj/kernel:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.49/self_attn_layer_norm/beta:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.49/self_attn_layer_norm/gamma:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.5/fc1/bias:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.5/fc1/kernel:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.5/fc2/bias:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.5/fc2/kernel:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.5/final_layer_norm/beta:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.5/final_layer_norm/gamma:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.5/self_attn/k_proj/bias:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.5/self_attn/k_proj/kernel:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.5/self_attn/out_proj/bias:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.5/self_attn/out_proj/kernel:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.5/self_attn/q_proj/bias:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.5/self_attn/q_proj/kernel:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.5/self_attn/v_proj/bias:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.5/self_attn/v_proj/kernel:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.5/self_attn_layer_norm/beta:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.5/self_attn_layer_norm/gamma:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.50/fc1/bias:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.50/fc1/kernel:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.50/fc2/bias:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.50/fc2/kernel:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.50/final_layer_norm/beta:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.50/final_layer_norm/gamma:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.50/self_attn/k_proj/bias:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.50/self_attn/k_proj/kernel:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.50/self_attn/out_proj/bias:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.50/self_attn/out_proj/kernel:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.50/self_attn/q_proj/bias:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.50/self_attn/q_proj/kernel:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.50/self_attn/v_proj/bias:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.50/self_attn/v_proj/kernel:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.50/self_attn_layer_norm/beta:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.50/self_attn_layer_norm/gamma:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.51/fc1/bias:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.51/fc1/kernel:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.51/fc2/bias:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.51/fc2/kernel:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.51/final_layer_norm/beta:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.51/final_layer_norm/gamma:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.51/self_attn/k_proj/bias:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.51/self_attn/k_proj/kernel:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.51/self_attn/out_proj/bias:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.51/self_attn/out_proj/kernel:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.51/self_attn/q_proj/bias:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.51/self_attn/q_proj/kernel:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.51/self_attn/v_proj/bias:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.51/self_attn/v_proj/kernel:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.51/self_attn_layer_norm/beta:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.51/self_attn_layer_norm/gamma:0": "tf_model-00011-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.52/fc1/bias:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.52/fc1/kernel:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.52/fc2/bias:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.52/fc2/kernel:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.52/final_layer_norm/beta:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.52/final_layer_norm/gamma:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.52/self_attn/k_proj/bias:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.52/self_attn/k_proj/kernel:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.52/self_attn/out_proj/bias:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.52/self_attn/out_proj/kernel:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.52/self_attn/q_proj/bias:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.52/self_attn/q_proj/kernel:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.52/self_attn/v_proj/bias:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.52/self_attn/v_proj/kernel:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.52/self_attn_layer_norm/beta:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.52/self_attn_layer_norm/gamma:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.53/fc1/bias:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.53/fc1/kernel:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.53/fc2/bias:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.53/fc2/kernel:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.53/final_layer_norm/beta:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.53/final_layer_norm/gamma:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.53/self_attn/k_proj/bias:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.53/self_attn/k_proj/kernel:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.53/self_attn/out_proj/bias:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.53/self_attn/out_proj/kernel:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.53/self_attn/q_proj/bias:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.53/self_attn/q_proj/kernel:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.53/self_attn/v_proj/bias:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.53/self_attn/v_proj/kernel:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.53/self_attn_layer_norm/beta:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.53/self_attn_layer_norm/gamma:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.54/fc1/bias:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.54/fc1/kernel:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.54/fc2/bias:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.54/fc2/kernel:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.54/final_layer_norm/beta:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.54/final_layer_norm/gamma:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.54/self_attn/k_proj/bias:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.54/self_attn/k_proj/kernel:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.54/self_attn/out_proj/bias:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.54/self_attn/out_proj/kernel:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.54/self_attn/q_proj/bias:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.54/self_attn/q_proj/kernel:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.54/self_attn/v_proj/bias:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.54/self_attn/v_proj/kernel:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.54/self_attn_layer_norm/beta:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.54/self_attn_layer_norm/gamma:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.55/fc1/bias:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.55/fc1/kernel:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.55/fc2/bias:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.55/fc2/kernel:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.55/final_layer_norm/beta:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.55/final_layer_norm/gamma:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.55/self_attn/k_proj/bias:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.55/self_attn/k_proj/kernel:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.55/self_attn/out_proj/bias:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.55/self_attn/out_proj/kernel:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.55/self_attn/q_proj/bias:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.55/self_attn/q_proj/kernel:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.55/self_attn/v_proj/bias:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.55/self_attn/v_proj/kernel:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.55/self_attn_layer_norm/beta:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.55/self_attn_layer_norm/gamma:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.56/fc1/bias:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.56/fc1/kernel:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.56/fc2/bias:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.56/fc2/kernel:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.56/final_layer_norm/beta:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.56/final_layer_norm/gamma:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.56/self_attn/k_proj/bias:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.56/self_attn/k_proj/kernel:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.56/self_attn/out_proj/bias:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.56/self_attn/out_proj/kernel:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.56/self_attn/q_proj/bias:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.56/self_attn/q_proj/kernel:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.56/self_attn/v_proj/bias:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.56/self_attn/v_proj/kernel:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.56/self_attn_layer_norm/beta:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.56/self_attn_layer_norm/gamma:0": "tf_model-00012-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.57/fc1/bias:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.57/fc1/kernel:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.57/fc2/bias:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.57/fc2/kernel:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.57/final_layer_norm/beta:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.57/final_layer_norm/gamma:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.57/self_attn/k_proj/bias:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.57/self_attn/k_proj/kernel:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.57/self_attn/out_proj/bias:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.57/self_attn/out_proj/kernel:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.57/self_attn/q_proj/bias:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.57/self_attn/q_proj/kernel:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.57/self_attn/v_proj/bias:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.57/self_attn/v_proj/kernel:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.57/self_attn_layer_norm/beta:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.57/self_attn_layer_norm/gamma:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.58/fc1/bias:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.58/fc1/kernel:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.58/fc2/bias:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.58/fc2/kernel:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.58/final_layer_norm/beta:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.58/final_layer_norm/gamma:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.58/self_attn/k_proj/bias:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.58/self_attn/k_proj/kernel:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.58/self_attn/out_proj/bias:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.58/self_attn/out_proj/kernel:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.58/self_attn/q_proj/bias:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.58/self_attn/q_proj/kernel:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.58/self_attn/v_proj/bias:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.58/self_attn/v_proj/kernel:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.58/self_attn_layer_norm/beta:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.58/self_attn_layer_norm/gamma:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.59/fc1/bias:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.59/fc1/kernel:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.59/fc2/bias:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.59/fc2/kernel:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.59/final_layer_norm/beta:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.59/final_layer_norm/gamma:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.59/self_attn/k_proj/bias:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.59/self_attn/k_proj/kernel:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.59/self_attn/out_proj/bias:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.59/self_attn/out_proj/kernel:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.59/self_attn/q_proj/bias:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.59/self_attn/q_proj/kernel:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.59/self_attn/v_proj/bias:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.59/self_attn/v_proj/kernel:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.59/self_attn_layer_norm/beta:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.59/self_attn_layer_norm/gamma:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.6/fc1/bias:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.6/fc1/kernel:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.6/fc2/bias:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.6/fc2/kernel:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.6/final_layer_norm/beta:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.6/final_layer_norm/gamma:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.6/self_attn/k_proj/bias:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.6/self_attn/k_proj/kernel:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.6/self_attn/out_proj/bias:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.6/self_attn/out_proj/kernel:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.6/self_attn/q_proj/bias:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.6/self_attn/q_proj/kernel:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.6/self_attn/v_proj/bias:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.6/self_attn/v_proj/kernel:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.6/self_attn_layer_norm/beta:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.6/self_attn_layer_norm/gamma:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.60/fc1/bias:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.60/fc1/kernel:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.60/fc2/bias:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.60/fc2/kernel:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.60/final_layer_norm/beta:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.60/final_layer_norm/gamma:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.60/self_attn/k_proj/bias:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.60/self_attn/k_proj/kernel:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.60/self_attn/out_proj/bias:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.60/self_attn/out_proj/kernel:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.60/self_attn/q_proj/bias:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.60/self_attn/q_proj/kernel:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.60/self_attn/v_proj/bias:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.60/self_attn/v_proj/kernel:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.60/self_attn_layer_norm/beta:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.60/self_attn_layer_norm/gamma:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.61/fc1/bias:0": "tf_model-00014-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.61/fc1/kernel:0": "tf_model-00014-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.61/fc2/bias:0": "tf_model-00014-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.61/fc2/kernel:0": "tf_model-00014-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.61/final_layer_norm/beta:0": "tf_model-00014-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.61/final_layer_norm/gamma:0": "tf_model-00014-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.61/self_attn/k_proj/bias:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.61/self_attn/k_proj/kernel:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.61/self_attn/out_proj/bias:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.61/self_attn/out_proj/kernel:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.61/self_attn/q_proj/bias:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.61/self_attn/q_proj/kernel:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.61/self_attn/v_proj/bias:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.61/self_attn/v_proj/kernel:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.61/self_attn_layer_norm/beta:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.61/self_attn_layer_norm/gamma:0": "tf_model-00013-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.62/fc1/bias:0": "tf_model-00014-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.62/fc1/kernel:0": "tf_model-00014-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.62/fc2/bias:0": "tf_model-00014-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.62/fc2/kernel:0": "tf_model-00014-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.62/final_layer_norm/beta:0": "tf_model-00014-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.62/final_layer_norm/gamma:0": "tf_model-00014-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.62/self_attn/k_proj/bias:0": "tf_model-00014-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.62/self_attn/k_proj/kernel:0": "tf_model-00014-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.62/self_attn/out_proj/bias:0": "tf_model-00014-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.62/self_attn/out_proj/kernel:0": "tf_model-00014-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.62/self_attn/q_proj/bias:0": "tf_model-00014-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.62/self_attn/q_proj/kernel:0": "tf_model-00014-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.62/self_attn/v_proj/bias:0": "tf_model-00014-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.62/self_attn/v_proj/kernel:0": "tf_model-00014-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.62/self_attn_layer_norm/beta:0": "tf_model-00014-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.62/self_attn_layer_norm/gamma:0": "tf_model-00014-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.63/fc1/bias:0": "tf_model-00014-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.63/fc1/kernel:0": "tf_model-00014-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.63/fc2/bias:0": "tf_model-00014-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.63/fc2/kernel:0": "tf_model-00014-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.63/final_layer_norm/beta:0": "tf_model-00014-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.63/final_layer_norm/gamma:0": "tf_model-00014-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.63/self_attn/k_proj/bias:0": "tf_model-00014-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.63/self_attn/k_proj/kernel:0": "tf_model-00014-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.63/self_attn/out_proj/bias:0": "tf_model-00014-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.63/self_attn/out_proj/kernel:0": "tf_model-00014-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.63/self_attn/q_proj/bias:0": "tf_model-00014-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.63/self_attn/q_proj/kernel:0": "tf_model-00014-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.63/self_attn/v_proj/bias:0": "tf_model-00014-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.63/self_attn/v_proj/kernel:0": "tf_model-00014-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.63/self_attn_layer_norm/beta:0": "tf_model-00014-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.63/self_attn_layer_norm/gamma:0": "tf_model-00014-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.7/fc1/bias:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.7/fc1/kernel:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.7/fc2/bias:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.7/fc2/kernel:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.7/final_layer_norm/beta:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.7/final_layer_norm/gamma:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.7/self_attn/k_proj/bias:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.7/self_attn/k_proj/kernel:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.7/self_attn/out_proj/bias:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.7/self_attn/out_proj/kernel:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.7/self_attn/q_proj/bias:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.7/self_attn/q_proj/kernel:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.7/self_attn/v_proj/bias:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.7/self_attn/v_proj/kernel:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.7/self_attn_layer_norm/beta:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.7/self_attn_layer_norm/gamma:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.8/fc1/bias:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.8/fc1/kernel:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.8/fc2/bias:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.8/fc2/kernel:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.8/final_layer_norm/beta:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.8/final_layer_norm/gamma:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.8/self_attn/k_proj/bias:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.8/self_attn/k_proj/kernel:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.8/self_attn/out_proj/bias:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.8/self_attn/out_proj/kernel:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.8/self_attn/q_proj/bias:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.8/self_attn/q_proj/kernel:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.8/self_attn/v_proj/bias:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.8/self_attn/v_proj/kernel:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.8/self_attn_layer_norm/beta:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.8/self_attn_layer_norm/gamma:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.9/fc1/bias:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.9/fc1/kernel:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.9/fc2/bias:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.9/fc2/kernel:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.9/final_layer_norm/beta:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.9/final_layer_norm/gamma:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.9/self_attn/k_proj/bias:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.9/self_attn/k_proj/kernel:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.9/self_attn/out_proj/bias:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.9/self_attn/out_proj/kernel:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.9/self_attn/q_proj/bias:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.9/self_attn/q_proj/kernel:0": "tf_model-00002-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.9/self_attn/v_proj/bias:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.9/self_attn/v_proj/kernel:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.9/self_attn_layer_norm/beta:0": "tf_model-00003-of-00014.h5", + "tfopt_for_causal_lm/model/decoder/layers.9/self_attn_layer_norm/gamma:0": "tf_model-00003-of-00014.h5" + } +}