diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..92efe92c64d7d5491ad84d0382a92116788ecfec 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,293 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +decoder.embed_positions.weight filter=lfs diff=lfs merge=lfs -text +decoder.embed_tokens.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.-1.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.0.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.0.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.0.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.0.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.0.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.0.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.1.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.1.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.1.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.1.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.1.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.1.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.10.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.10.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.10.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.10.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.10.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.10.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.11.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.11.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.11.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.11.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.11.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.11.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.12.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.12.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.12.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.12.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.12.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.12.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.13.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.13.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.13.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.13.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.13.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.13.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.14.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.14.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.14.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.14.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.14.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.14.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.15.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.15.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.15.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.15.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.15.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.15.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.16.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.16.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.16.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.16.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.16.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.16.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.17.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.17.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.17.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.17.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.17.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.17.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.18.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.18.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.18.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.18.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.18.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.18.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.19.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.19.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.19.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.19.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.19.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.19.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.2.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.2.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.2.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.2.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.2.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.2.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.20.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.20.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.20.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.20.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.20.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.20.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.21.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.21.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.21.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.21.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.21.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.21.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.22.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.22.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.22.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.22.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.22.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.22.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.23.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.23.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.23.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.23.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.23.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.23.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.24.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.24.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.24.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.24.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.24.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.24.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.25.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.25.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.25.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.25.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.25.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.25.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.26.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.26.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.26.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.26.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.26.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.26.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.27.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.27.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.27.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.27.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.27.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.27.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.28.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.28.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.28.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.28.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.28.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.28.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.29.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.29.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.29.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.29.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.29.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.29.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.3.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.3.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.3.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.3.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.3.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.3.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.30.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.30.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.30.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.30.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.30.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.30.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.31.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.31.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.31.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.31.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.31.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.31.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.32.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.32.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.32.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.32.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.32.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.32.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.33.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.33.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.33.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.33.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.33.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.33.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.34.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.34.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.34.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.34.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.34.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.34.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.35.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.35.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.35.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.35.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.35.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.35.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.36.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.36.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.36.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.36.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.36.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.36.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.37.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.37.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.37.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.37.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.37.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.37.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.38.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.38.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.38.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.38.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.38.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.38.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.39.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.39.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.39.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.39.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.39.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.39.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.4.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.4.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.4.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.4.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.4.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.4.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.40.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.40.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.40.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.40.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.40.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.40.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.41.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.41.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.41.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.41.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.41.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.41.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.42.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.42.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.42.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.42.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.42.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.42.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.43.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.43.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.43.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.43.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.43.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.43.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.44.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.44.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.44.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.44.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.44.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.44.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.45.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.45.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.45.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.45.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.45.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.45.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.46.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.46.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.46.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.46.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.46.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.46.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.47.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.47.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.47.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.47.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.47.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.5.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.5.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.5.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.5.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.5.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.5.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.6.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.6.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.6.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.6.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.6.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.6.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.7.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.7.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.7.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.7.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.7.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.7.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.8.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.8.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.8.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.8.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.8.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.8.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text +decoder.layers.9.attn-head-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.9.attn.mlp-sparsity-predictor.1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.9.attn.mlp-sparsity-predictor.2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.9.fc1.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.9.fc2.weight filter=lfs diff=lfs merge=lfs -text +decoder.layers.9.self_attn.catted_head_weights filter=lfs diff=lfs merge=lfs -text diff --git a/decoder.embed_positions.weight b/decoder.embed_positions.weight new file mode 100644 index 0000000000000000000000000000000000000000..f3ea93bfbfd9cd6e272febb1cfb9e7fd10daba03 --- /dev/null +++ b/decoder.embed_positions.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cf00ad035ead79d2ed48ec3d522561ef720f15bffa949e4bb19a9113f3c5942 +size 29388800 diff --git a/decoder.embed_tokens.weight b/decoder.embed_tokens.weight new file mode 100644 index 0000000000000000000000000000000000000000..3e0d8a956112dfa3990c2464036583a86c8b3768 --- /dev/null +++ b/decoder.embed_tokens.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:476ae65e6101e387c458e4f97bb25b202a40c544be169b01f1f3c992c2bce87f +size 720699392 diff --git a/decoder.final_layer_norm.bias b/decoder.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..98fd8c39c90b3b2adfa75a761369d74774305862 Binary files /dev/null and b/decoder.final_layer_norm.bias differ diff --git a/decoder.final_layer_norm.weight b/decoder.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.final_layer_norm.weight differ diff --git a/decoder.layers.-1.attn-head-sparsity-predictor.1.weight b/decoder.layers.-1.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..31db15a0e9815e720042037020f57414a3967c32 --- /dev/null +++ b/decoder.layers.-1.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0367c5f848f51d25dedf960ddb2c1b77f64cdb5de0a84b450ec72877eca7839b +size 28673513 diff --git a/decoder.layers.-1.attn-head-sparsity-predictor.2.weight b/decoder.layers.-1.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..7d5c0f3b3d02c5bfe25ceb42b442b21285e51d78 Binary files /dev/null and b/decoder.layers.-1.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.0.attn-head-sparsity-predictor.1.weight b/decoder.layers.0.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..e47855877324e21bbedbbeceac43268982256057 --- /dev/null +++ b/decoder.layers.0.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a22abccbac13fa4eedebdc25223ee424dece7854b1ce2d5ceaecdd2ec382b7a +size 28673508 diff --git a/decoder.layers.0.attn-head-sparsity-predictor.2.weight b/decoder.layers.0.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..60ece9e1328fc1fe3ab1a888878ed2089eb6265f Binary files /dev/null and b/decoder.layers.0.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.0.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.0.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..0a99ae9e0762ee571d7d3a8eda69fc06c9e1bb17 --- /dev/null +++ b/decoder.layers.0.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57417f7b96f250b51647ecaa6c3d3f948d48b1b3943f33dac4a352b177addb45 +size 28673334 diff --git a/decoder.layers.0.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.0.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..3ab055ee347035c42d68ebf1f8d176a455f3a579 --- /dev/null +++ b/decoder.layers.0.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c51abfab2cdb85d3f751b3ce6c5d0208e0be24a113a92e9c3c6cc5fae8d64b9e +size 114689334 diff --git a/decoder.layers.0.fc1.bias b/decoder.layers.0.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..f6fdd26038f8400f77f504548b9522ddb6929434 Binary files /dev/null and b/decoder.layers.0.fc1.bias differ diff --git a/decoder.layers.0.fc1.weight b/decoder.layers.0.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..6778eef6f5398654d7183db6c5418173829b705c --- /dev/null +++ b/decoder.layers.0.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7be7e5e24f7f51375dacf99b97f45795e1e16272def0f0b4291ca4e8ef50047 +size 411041792 diff --git a/decoder.layers.0.fc2.bias b/decoder.layers.0.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..6a206c96b95ad417fde490fa829ee5c0cf068522 Binary files /dev/null and b/decoder.layers.0.fc2.bias differ diff --git a/decoder.layers.0.fc2.weight b/decoder.layers.0.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..ec13ac20f08088a875d00f76e6685c2aa8b3d416 --- /dev/null +++ b/decoder.layers.0.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0043b590c01d29da4d352510110585a1ab32a1a964cfbd9a64c08facf5c323bc +size 411041792 diff --git a/decoder.layers.0.final_layer_norm.bias b/decoder.layers.0.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..9e20480f45c2281c3058d3aa32cf245310d7f88d Binary files /dev/null and b/decoder.layers.0.final_layer_norm.bias differ diff --git a/decoder.layers.0.final_layer_norm.weight b/decoder.layers.0.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.0.final_layer_norm.weight differ diff --git a/decoder.layers.0.self_attn.catted_head_biases b/decoder.layers.0.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..2f07b32de1e0f1e422aeae4cf30e9340e6268227 Binary files /dev/null and b/decoder.layers.0.self_attn.catted_head_biases differ diff --git a/decoder.layers.0.self_attn.catted_head_weights b/decoder.layers.0.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..e5f443c609c58f9aebe3f1dea3c88344523bb51a --- /dev/null +++ b/decoder.layers.0.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5941fc85b3da59c91af7f8337cc9f3646c451b562433adda2982aa8448ea4534 +size 411041792 diff --git a/decoder.layers.0.self_attn.out_proj.bias b/decoder.layers.0.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..3087ed195aefbcb838be78c8a9626c0c10ad50ac Binary files /dev/null and b/decoder.layers.0.self_attn.out_proj.bias differ diff --git a/decoder.layers.0.self_attn_layer_norm.bias b/decoder.layers.0.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..6acb26b30862c4fd74b2887dea99e6760529437d Binary files /dev/null and b/decoder.layers.0.self_attn_layer_norm.bias differ diff --git a/decoder.layers.0.self_attn_layer_norm.weight b/decoder.layers.0.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.0.self_attn_layer_norm.weight differ diff --git a/decoder.layers.1.attn-head-sparsity-predictor.1.weight b/decoder.layers.1.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..6f1f226f205e10d28cf719aa92c137b89137acef --- /dev/null +++ b/decoder.layers.1.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d21336cb726e7b71c67fef9ea1172dc8ce7a07e8259c8c4a6430d8d7b7fb71b +size 28673508 diff --git a/decoder.layers.1.attn-head-sparsity-predictor.2.weight b/decoder.layers.1.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..60d0749c286ca05eae56545f310eadc8898aa489 Binary files /dev/null and b/decoder.layers.1.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.1.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.1.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..14ad4ecd8d43756636b5ea89d7185acba7c3041d --- /dev/null +++ b/decoder.layers.1.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:219e2ac10da9e400afcbe461b0f9e08060fe9c2853033e98bdab2c73477ff2fd +size 28673503 diff --git a/decoder.layers.1.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.1.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..7eb664ae0ebc7fc38c7e71ad023d1a921668c4ee --- /dev/null +++ b/decoder.layers.1.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc71c4d33a69bb53d0184d02c2fe7770d40376b779ade6dbc91792399c0fdf8d +size 114689503 diff --git a/decoder.layers.1.fc1.bias b/decoder.layers.1.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..06a7b63cf45db50156cf8910e80081801626622a Binary files /dev/null and b/decoder.layers.1.fc1.bias differ diff --git a/decoder.layers.1.fc1.weight b/decoder.layers.1.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..7fb1801f647cd26acb5764ad7b0e902be22194be --- /dev/null +++ b/decoder.layers.1.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02d7617e85bf07bce9dea5dd02c24350cd7bc817efb8d09d693de879ef0821fa +size 411041792 diff --git a/decoder.layers.1.fc2.bias b/decoder.layers.1.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..88b21e1fe0438857029c178b0f64aa8de9decdd1 Binary files /dev/null and b/decoder.layers.1.fc2.bias differ diff --git a/decoder.layers.1.fc2.weight b/decoder.layers.1.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..f642e676c1b9e923a3943b1ed01ab2d71c0e32f6 --- /dev/null +++ b/decoder.layers.1.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff95a862905f51cff395770d52d8d4d0749616038faee552f0c2470a843aa3bc +size 411041792 diff --git a/decoder.layers.1.final_layer_norm.bias b/decoder.layers.1.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..f4a8a5d882005e2cb0d3787e0505e2b3e3cc9c62 Binary files /dev/null and b/decoder.layers.1.final_layer_norm.bias differ diff --git a/decoder.layers.1.final_layer_norm.weight b/decoder.layers.1.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.1.final_layer_norm.weight differ diff --git a/decoder.layers.1.self_attn.catted_head_biases b/decoder.layers.1.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..b547ee91bcfa4714f4e418dd89c9cce129f068a2 Binary files /dev/null and b/decoder.layers.1.self_attn.catted_head_biases differ diff --git a/decoder.layers.1.self_attn.catted_head_weights b/decoder.layers.1.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..959e1da3c182e3caeabe4000178afc2888b6d207 --- /dev/null +++ b/decoder.layers.1.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:268f002e7b1eb3fabd1767ce104ff449e492936658cc6e7c19a038262f30b5bf +size 411041792 diff --git a/decoder.layers.1.self_attn.out_proj.bias b/decoder.layers.1.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..67b5bb5b37657af2c5e7dcdb1c936729e179f50b Binary files /dev/null and b/decoder.layers.1.self_attn.out_proj.bias differ diff --git a/decoder.layers.1.self_attn_layer_norm.bias b/decoder.layers.1.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..bc750f44288baabfebd76675006678f7bb5edf50 Binary files /dev/null and b/decoder.layers.1.self_attn_layer_norm.bias differ diff --git a/decoder.layers.1.self_attn_layer_norm.weight b/decoder.layers.1.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.1.self_attn_layer_norm.weight differ diff --git a/decoder.layers.10.attn-head-sparsity-predictor.1.weight b/decoder.layers.10.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..06a1e33883c81457f90c2e2a17812456cc033e0b --- /dev/null +++ b/decoder.layers.10.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e828b35fe897e574c3c400bdbcf4c0cbb282be057cdd855367dab21ab073b226 +size 28673513 diff --git a/decoder.layers.10.attn-head-sparsity-predictor.2.weight b/decoder.layers.10.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..bcdb66742a22026fc30ca883df57cbd33b27d291 Binary files /dev/null and b/decoder.layers.10.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.10.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.10.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..dce3c5f74356373bc8d6bce3a8f10052a5cb5aa3 --- /dev/null +++ b/decoder.layers.10.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8db4523cba617e4b78d1d9e3c900da5e8999c4a3d70650755562a590adae391 +size 28673508 diff --git a/decoder.layers.10.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.10.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..c7c52900cf6a7216be57e2b70046b98d3b16c0e9 --- /dev/null +++ b/decoder.layers.10.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49e295828515f15e474bf68db2fea679dd9fb0e30b2f2a7d51da9676c6d292f5 +size 114689508 diff --git a/decoder.layers.10.fc1.bias b/decoder.layers.10.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..d210c854bbb754a04ab4485f2ec61ae2a4fceb8d Binary files /dev/null and b/decoder.layers.10.fc1.bias differ diff --git a/decoder.layers.10.fc1.weight b/decoder.layers.10.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..b869cb9803be21302533c7954ad4e81ed1a8dc1b --- /dev/null +++ b/decoder.layers.10.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc4836d9b2b382d49cad6e60922c107cc1cfab8f0b17dba5e9326ecf08f4002c +size 411041792 diff --git a/decoder.layers.10.fc2.bias b/decoder.layers.10.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..972fa33e1ce089a71405f9386464faf3265c97e3 Binary files /dev/null and b/decoder.layers.10.fc2.bias differ diff --git a/decoder.layers.10.fc2.weight b/decoder.layers.10.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..175f5196486607334a5124bda17dc18199d23943 --- /dev/null +++ b/decoder.layers.10.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efddc1e98aa94538dcd9b047f840f825b240b956c2a2f0c4d057c82f8f6e3c28 +size 411041792 diff --git a/decoder.layers.10.final_layer_norm.bias b/decoder.layers.10.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..9265735687093547144fe8ee64a96113747062b2 Binary files /dev/null and b/decoder.layers.10.final_layer_norm.bias differ diff --git a/decoder.layers.10.final_layer_norm.weight b/decoder.layers.10.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.10.final_layer_norm.weight differ diff --git a/decoder.layers.10.self_attn.catted_head_biases b/decoder.layers.10.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..11f7bb07abf5637c9e611557e7195c07d0859128 Binary files /dev/null and b/decoder.layers.10.self_attn.catted_head_biases differ diff --git a/decoder.layers.10.self_attn.catted_head_weights b/decoder.layers.10.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..c58dd7af701a4e8b60fc28aa071bee641220a3b1 --- /dev/null +++ b/decoder.layers.10.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68858918d1d1f339ce7c211a973cb84b42e7b53fdc4e9f6401ac6f3f6372d4d7 +size 411041792 diff --git a/decoder.layers.10.self_attn.out_proj.bias b/decoder.layers.10.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..d75c3a03a6612140af37adf6bad4637b055dfae8 Binary files /dev/null and b/decoder.layers.10.self_attn.out_proj.bias differ diff --git a/decoder.layers.10.self_attn_layer_norm.bias b/decoder.layers.10.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..94a6edf2e2128efcbb5359199a9cac03b4c01f88 Binary files /dev/null and b/decoder.layers.10.self_attn_layer_norm.bias differ diff --git a/decoder.layers.10.self_attn_layer_norm.weight b/decoder.layers.10.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.10.self_attn_layer_norm.weight differ diff --git a/decoder.layers.11.attn-head-sparsity-predictor.1.weight b/decoder.layers.11.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..dbf63dd574620a2745b8ebdf3a76581193a50875 --- /dev/null +++ b/decoder.layers.11.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca52e176ddb1ec46f062304916c3465d4ebfeb94d18a8f4da100b5677c7f6c87 +size 28673513 diff --git a/decoder.layers.11.attn-head-sparsity-predictor.2.weight b/decoder.layers.11.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..7e8e564b611267aa559aed5d624520a943639163 Binary files /dev/null and b/decoder.layers.11.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.11.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.11.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..6bf33252f3c9b70904081d42f7f22c7f54999d18 --- /dev/null +++ b/decoder.layers.11.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcac21e967da02258e93d4d4230bfe95b333ce3d59c9b2b69eaebd7400f030c6 +size 28673508 diff --git a/decoder.layers.11.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.11.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..3ae3ad6f66ee037d7f8adf7fb4ae5932cb604470 --- /dev/null +++ b/decoder.layers.11.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7f83ea1db3d4b96b0f27e8c3499319e45b70ecdd8eb1b0184865d7b04507ec8 +size 114689508 diff --git a/decoder.layers.11.fc1.bias b/decoder.layers.11.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..f6826cc06da5fe35ea2a05aa34883e823e7bbba2 Binary files /dev/null and b/decoder.layers.11.fc1.bias differ diff --git a/decoder.layers.11.fc1.weight b/decoder.layers.11.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..4c817fa57b833eb0b81131b57feb8de6e3ef8cfb --- /dev/null +++ b/decoder.layers.11.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7425e01ef09c6af95315d1e4c2deeba50efb0f904228fc2141d7c53fbea81be +size 411041792 diff --git a/decoder.layers.11.fc2.bias b/decoder.layers.11.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..c60fbbfecd4362a5186642e63715397db80a7624 Binary files /dev/null and b/decoder.layers.11.fc2.bias differ diff --git a/decoder.layers.11.fc2.weight b/decoder.layers.11.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..307d3f940eb0e008e5de23691b3d6c9f1714ac46 --- /dev/null +++ b/decoder.layers.11.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:571a1512b12abd0ef81b0aced263a5a6aa2cda0b22306f5fc28358ac124faf67 +size 411041792 diff --git a/decoder.layers.11.final_layer_norm.bias b/decoder.layers.11.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..72bc82f69d524e0c8048e7fd405735f81ffd2abd Binary files /dev/null and b/decoder.layers.11.final_layer_norm.bias differ diff --git a/decoder.layers.11.final_layer_norm.weight b/decoder.layers.11.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.11.final_layer_norm.weight differ diff --git a/decoder.layers.11.self_attn.catted_head_biases b/decoder.layers.11.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..8ba2d3a2ecfc124f7331cc813a1b1fa52317818f Binary files /dev/null and b/decoder.layers.11.self_attn.catted_head_biases differ diff --git a/decoder.layers.11.self_attn.catted_head_weights b/decoder.layers.11.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..319a76be1077ad2635497a992ad232a153526cd4 --- /dev/null +++ b/decoder.layers.11.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d99958e771706488aaf8d1a5899f30782cfdb016a7fb38d4052405983967f75f +size 411041792 diff --git a/decoder.layers.11.self_attn.out_proj.bias b/decoder.layers.11.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..c2b7c77b47bc7cac68d09dd8eb06e3bb9f57302b Binary files /dev/null and b/decoder.layers.11.self_attn.out_proj.bias differ diff --git a/decoder.layers.11.self_attn_layer_norm.bias b/decoder.layers.11.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..da11414d31e90697897faa500a436ceb0a5dd415 Binary files /dev/null and b/decoder.layers.11.self_attn_layer_norm.bias differ diff --git a/decoder.layers.11.self_attn_layer_norm.weight b/decoder.layers.11.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.11.self_attn_layer_norm.weight differ diff --git a/decoder.layers.12.attn-head-sparsity-predictor.1.weight b/decoder.layers.12.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..61d4799a0121f1d9d7bcd8b0db1d954f51f149e1 --- /dev/null +++ b/decoder.layers.12.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2225333072291e18eb47176d0dfdebf61a51cc323b994beaf5e414499f88d66 +size 28673513 diff --git a/decoder.layers.12.attn-head-sparsity-predictor.2.weight b/decoder.layers.12.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..844c32792f20449d8449b3fc810bce61cdef9f98 Binary files /dev/null and b/decoder.layers.12.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.12.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.12.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..1c04a7014b11b11c22f131a53728d03221d20040 --- /dev/null +++ b/decoder.layers.12.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22cdf78ec9711517c73f81c0e00377914b1ecb2b6e769aafb8eb74192d0c5997 +size 28673508 diff --git a/decoder.layers.12.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.12.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..5f22a1f33c7907f145e230162d495c79680732f7 --- /dev/null +++ b/decoder.layers.12.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:510cf8b773b6a299d45e3a56bcefc422c5bb8a4693fa65b15be58be226dd6bef +size 114689508 diff --git a/decoder.layers.12.fc1.bias b/decoder.layers.12.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..63f3d9d4cb558edaf8b9373e7226537a48c5573d Binary files /dev/null and b/decoder.layers.12.fc1.bias differ diff --git a/decoder.layers.12.fc1.weight b/decoder.layers.12.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..df586534f970158567bdb1be9b596ddba0c21df4 --- /dev/null +++ b/decoder.layers.12.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac4fa7babf5b44c5e3b4c6b9b1b79fe21367473c290137b5a324722ff6b0b592 +size 411041792 diff --git a/decoder.layers.12.fc2.bias b/decoder.layers.12.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..9a0557e597ac503223df4bf7e4c6bd3f1aa401ec Binary files /dev/null and b/decoder.layers.12.fc2.bias differ diff --git a/decoder.layers.12.fc2.weight b/decoder.layers.12.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..8deccfc64ee79f66ad109004ff3bb720745a6db5 --- /dev/null +++ b/decoder.layers.12.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c064a051a9efbb54e43c9be42d4e49e9c4f4a89dd4f4bc4fb1ba98bf118e112 +size 411041792 diff --git a/decoder.layers.12.final_layer_norm.bias b/decoder.layers.12.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..052f12a5c3c64038c24452ef2da31723ae1df3f0 Binary files /dev/null and b/decoder.layers.12.final_layer_norm.bias differ diff --git a/decoder.layers.12.final_layer_norm.weight b/decoder.layers.12.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.12.final_layer_norm.weight differ diff --git a/decoder.layers.12.self_attn.catted_head_biases b/decoder.layers.12.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..cf148874fddd6fe29447c5d7dce433e4989938d6 Binary files /dev/null and b/decoder.layers.12.self_attn.catted_head_biases differ diff --git a/decoder.layers.12.self_attn.catted_head_weights b/decoder.layers.12.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..08de72aff355d7f66bfe7edb8773d0fcc4e4cf3c --- /dev/null +++ b/decoder.layers.12.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3970de58fe9e06b837e56eb7626ffd47e12e7c8ad3c40e5bbbf3dc575fcc28bb +size 411041792 diff --git a/decoder.layers.12.self_attn.out_proj.bias b/decoder.layers.12.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..f90b53a3ef8843b229d62431ce31545423be4e1f Binary files /dev/null and b/decoder.layers.12.self_attn.out_proj.bias differ diff --git a/decoder.layers.12.self_attn_layer_norm.bias b/decoder.layers.12.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..637172c1dcd179d7fa3436250e58e77d304a46f8 Binary files /dev/null and b/decoder.layers.12.self_attn_layer_norm.bias differ diff --git a/decoder.layers.12.self_attn_layer_norm.weight b/decoder.layers.12.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.12.self_attn_layer_norm.weight differ diff --git a/decoder.layers.13.attn-head-sparsity-predictor.1.weight b/decoder.layers.13.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..6fa483fe11f58780442033202760754b4bb0d372 --- /dev/null +++ b/decoder.layers.13.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:718d41565ec8c6392f6bb89d35961226e5b70557b6857a222e65a7a2ee9f886a +size 28673513 diff --git a/decoder.layers.13.attn-head-sparsity-predictor.2.weight b/decoder.layers.13.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..7a3b351623d1c3db2d021f3c62a400912644c940 Binary files /dev/null and b/decoder.layers.13.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.13.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.13.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..d2ef46b1d6aa75e3504a3b5b555a04bea3ee4ded --- /dev/null +++ b/decoder.layers.13.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb166989b5a98721e102ced166e04573f3d21d5b63f132f114e7c660b749d667 +size 28673508 diff --git a/decoder.layers.13.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.13.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..9253fa9dad27a107379b11b324fb502783f9f5eb --- /dev/null +++ b/decoder.layers.13.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e69c5ee63254dc4577c7c2b9fb9e7e568007c7d996fc120c8a9929eae0e8682 +size 114689508 diff --git a/decoder.layers.13.fc1.bias b/decoder.layers.13.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..a9413c36b2b19d838364efbf64a89e703ba20bbe Binary files /dev/null and b/decoder.layers.13.fc1.bias differ diff --git a/decoder.layers.13.fc1.weight b/decoder.layers.13.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..936f9c1ab5bdcdbd3ab7af38a1902be5be133b92 --- /dev/null +++ b/decoder.layers.13.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edb7553f64326564a76bbe5c3eac1d84e21cc561df6c422d3880de37f3aa95be +size 411041792 diff --git a/decoder.layers.13.fc2.bias b/decoder.layers.13.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..dcf043cfeeb267266c3648f9a7671bdd3bbe7595 Binary files /dev/null and b/decoder.layers.13.fc2.bias differ diff --git a/decoder.layers.13.fc2.weight b/decoder.layers.13.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..0e58fa03ec1fcc91bcc8492b3cfa7453866e3b4f --- /dev/null +++ b/decoder.layers.13.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52c3cd7fa774a9d263c95ad5c39f398b6fd15ecb21688ec15c4617e8889ec954 +size 411041792 diff --git a/decoder.layers.13.final_layer_norm.bias b/decoder.layers.13.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..8edf1649f51fa36bf076ddf6cfa1155e5f2f71d4 Binary files /dev/null and b/decoder.layers.13.final_layer_norm.bias differ diff --git a/decoder.layers.13.final_layer_norm.weight b/decoder.layers.13.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.13.final_layer_norm.weight differ diff --git a/decoder.layers.13.self_attn.catted_head_biases b/decoder.layers.13.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..af0d4fb694dc373aed5e7af2d9fb09c26f0859dc Binary files /dev/null and b/decoder.layers.13.self_attn.catted_head_biases differ diff --git a/decoder.layers.13.self_attn.catted_head_weights b/decoder.layers.13.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..76e668b1396899d5f9d3a7f6fdad777d6d986346 --- /dev/null +++ b/decoder.layers.13.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f14dda2560255ee79dbca6b16542cc0c002cb0a799c52fe75298f523fa61384b +size 411041792 diff --git a/decoder.layers.13.self_attn.out_proj.bias b/decoder.layers.13.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..7be470909488694e47c9e323862a4c138483d4f7 Binary files /dev/null and b/decoder.layers.13.self_attn.out_proj.bias differ diff --git a/decoder.layers.13.self_attn_layer_norm.bias b/decoder.layers.13.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..177b7b7aee6e85e3a1bd27a5bfa0845a78be2b48 Binary files /dev/null and b/decoder.layers.13.self_attn_layer_norm.bias differ diff --git a/decoder.layers.13.self_attn_layer_norm.weight b/decoder.layers.13.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.13.self_attn_layer_norm.weight differ diff --git a/decoder.layers.14.attn-head-sparsity-predictor.1.weight b/decoder.layers.14.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..6f9f1983fcf20176b7361c970a3246fa07ad0b4e --- /dev/null +++ b/decoder.layers.14.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2942ade9090a3e26d0bee3c54d161391ab99b2415932036bcd895b9f01f3f37b +size 28673513 diff --git a/decoder.layers.14.attn-head-sparsity-predictor.2.weight b/decoder.layers.14.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..7e7f4afebbfea48453dec52a9e0b646e121975a2 Binary files /dev/null and b/decoder.layers.14.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.14.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.14.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..4b3bf3cfd359ac664424140f7c0d438058076b51 --- /dev/null +++ b/decoder.layers.14.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b846a2c0a0ae787023714224d4b3254538c3ea397d593607e960c77efc73d3c0 +size 28673508 diff --git a/decoder.layers.14.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.14.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..79465a8ffd2d22a27079586b11b728fb530fda71 --- /dev/null +++ b/decoder.layers.14.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:405c6d57eff57ae138e8e4d502a03f5c0f1ae8bf2a71dfea655820a98e1ea17b +size 114689508 diff --git a/decoder.layers.14.fc1.bias b/decoder.layers.14.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..00778ee6adbc2d80579feb65f38c69824e3f8e59 Binary files /dev/null and b/decoder.layers.14.fc1.bias differ diff --git a/decoder.layers.14.fc1.weight b/decoder.layers.14.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..ead0c6fe511be055cdc3d7587771a7c1538e456e --- /dev/null +++ b/decoder.layers.14.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:057cc5401200962bf62fbe9e097d7442bb51a3d01a8fb4067926c31840b3c94a +size 411041792 diff --git a/decoder.layers.14.fc2.bias b/decoder.layers.14.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..a57b97e44cb38f06e0b506e04d42827c52e62ba2 Binary files /dev/null and b/decoder.layers.14.fc2.bias differ diff --git a/decoder.layers.14.fc2.weight b/decoder.layers.14.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..6149352e3e9dd7b2c531780a5c2a0e639730679a --- /dev/null +++ b/decoder.layers.14.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb25a90c18911368aca0d83f2a595d1bb387f1af20a97adc64bdf8ae8ee89e27 +size 411041792 diff --git a/decoder.layers.14.final_layer_norm.bias b/decoder.layers.14.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..12fb8aa6b0f3c38d70c6f0d7d8721a0c2e3cc8d3 Binary files /dev/null and b/decoder.layers.14.final_layer_norm.bias differ diff --git a/decoder.layers.14.final_layer_norm.weight b/decoder.layers.14.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.14.final_layer_norm.weight differ diff --git a/decoder.layers.14.self_attn.catted_head_biases b/decoder.layers.14.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..b004f4bf4bfc7ba1145f625e39a861c933b71c6f Binary files /dev/null and b/decoder.layers.14.self_attn.catted_head_biases differ diff --git a/decoder.layers.14.self_attn.catted_head_weights b/decoder.layers.14.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..0944e38ec4dcf1044a1f5391548ff600c2c88502 --- /dev/null +++ b/decoder.layers.14.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfe97e7118af3aebddd096d34c83ec5b9300a1ebdd156a5d15c71eb1eb67fe48 +size 411041792 diff --git a/decoder.layers.14.self_attn.out_proj.bias b/decoder.layers.14.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..a18cbe14ed4035161e318d0ea6e9a0244b9178e9 Binary files /dev/null and b/decoder.layers.14.self_attn.out_proj.bias differ diff --git a/decoder.layers.14.self_attn_layer_norm.bias b/decoder.layers.14.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..04613c47331423c07cde7f33252f1ee6cb12d5ef Binary files /dev/null and b/decoder.layers.14.self_attn_layer_norm.bias differ diff --git a/decoder.layers.14.self_attn_layer_norm.weight b/decoder.layers.14.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.14.self_attn_layer_norm.weight differ diff --git a/decoder.layers.15.attn-head-sparsity-predictor.1.weight b/decoder.layers.15.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..879d3c4291771e32f4f8eaf0f68a778440de5439 --- /dev/null +++ b/decoder.layers.15.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:add177542355ec15d3ddc69631873185578d2e1186a7c4397a8b8053aae3d1d8 +size 28673513 diff --git a/decoder.layers.15.attn-head-sparsity-predictor.2.weight b/decoder.layers.15.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..7028a48a7d1aa44fd18ac84180f81b0fdfce261e Binary files /dev/null and b/decoder.layers.15.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.15.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.15.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..bb9f24dc852cfef6e311e9ae0b7a95b1ec27d87c --- /dev/null +++ b/decoder.layers.15.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1ddb6860a8d39e518ba51cf7361a25a8ad9d2d7d0c12366645beee026caec1b +size 28673508 diff --git a/decoder.layers.15.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.15.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..ba714c85420587ffb767bb8b5a72209bd0786fc0 --- /dev/null +++ b/decoder.layers.15.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f82815cce60b4c984abe473dd68ed26ced23593e772e87e8e52713570b744e93 +size 114689508 diff --git a/decoder.layers.15.fc1.bias b/decoder.layers.15.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..74a2445a256a803f0a2427b384b053c14b176f03 Binary files /dev/null and b/decoder.layers.15.fc1.bias differ diff --git a/decoder.layers.15.fc1.weight b/decoder.layers.15.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..c0427b17f38d9c85a99c37aacd77d670379b4d80 --- /dev/null +++ b/decoder.layers.15.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3d3f8d78c5a58f93fc46c821e813994f7d26005040bfa3c5dda934e60368a4f +size 411041792 diff --git a/decoder.layers.15.fc2.bias b/decoder.layers.15.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..6a1997921dc08c969ff80ae893b6885f076dbff4 Binary files /dev/null and b/decoder.layers.15.fc2.bias differ diff --git a/decoder.layers.15.fc2.weight b/decoder.layers.15.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..0862502d400789f0c5d03a14099fb57ddf98ad98 --- /dev/null +++ b/decoder.layers.15.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68ebfad4fc72af83cfb7cc48ce77cabc97406ec5dfd7305b2bd82ebfdd0cce57 +size 411041792 diff --git a/decoder.layers.15.final_layer_norm.bias b/decoder.layers.15.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..07814e2e918dc386afba956409ed8cc8ffa2d0a7 Binary files /dev/null and b/decoder.layers.15.final_layer_norm.bias differ diff --git a/decoder.layers.15.final_layer_norm.weight b/decoder.layers.15.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.15.final_layer_norm.weight differ diff --git a/decoder.layers.15.self_attn.catted_head_biases b/decoder.layers.15.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..b41612a00f7e48c0988b077723558b2aedbedf6f Binary files /dev/null and b/decoder.layers.15.self_attn.catted_head_biases differ diff --git a/decoder.layers.15.self_attn.catted_head_weights b/decoder.layers.15.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..6562960a1bdea4eac7d0c7608f00fa0a928b88d9 --- /dev/null +++ b/decoder.layers.15.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:875592d86c4570e12f8865341b0125ec036e26c94c300efc2617d108c06d5857 +size 411041792 diff --git a/decoder.layers.15.self_attn.out_proj.bias b/decoder.layers.15.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..54768377b4fa7b1e8337857c0b35c8f2e825177e Binary files /dev/null and b/decoder.layers.15.self_attn.out_proj.bias differ diff --git a/decoder.layers.15.self_attn_layer_norm.bias b/decoder.layers.15.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..8a36d267b8aafb2b787fa8d5c284c72e17a3308b Binary files /dev/null and b/decoder.layers.15.self_attn_layer_norm.bias differ diff --git a/decoder.layers.15.self_attn_layer_norm.weight b/decoder.layers.15.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.15.self_attn_layer_norm.weight differ diff --git a/decoder.layers.16.attn-head-sparsity-predictor.1.weight b/decoder.layers.16.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..9bc3b66f96ce3340c286c98ae9f653ca7ca4b3d5 --- /dev/null +++ b/decoder.layers.16.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db24223789dbcd759df09aff9762f887a7b388ee1d82d1697125fda54af6226f +size 28673513 diff --git a/decoder.layers.16.attn-head-sparsity-predictor.2.weight b/decoder.layers.16.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..2f732f4fba61cdd6bc991caabac64fc630ea5c7d Binary files /dev/null and b/decoder.layers.16.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.16.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.16.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..d6a31300e2c92df072465bb4f67ced0ceb2a3930 --- /dev/null +++ b/decoder.layers.16.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82fe5ab001ec7a982b8b100e867e3a394be793862512f77f5727d7ae7f67bb34 +size 28673508 diff --git a/decoder.layers.16.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.16.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..9eddcce75b2c8ee745e3b43220020f6d014beb0b --- /dev/null +++ b/decoder.layers.16.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46cf53cdfbc59bbe2a576c0fef64ce2e47a72831b722d0f9147d94300735b8f3 +size 114689508 diff --git a/decoder.layers.16.fc1.bias b/decoder.layers.16.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..bed86e4485f996bd6101b1c6a4e25e4274f9b8c3 Binary files /dev/null and b/decoder.layers.16.fc1.bias differ diff --git a/decoder.layers.16.fc1.weight b/decoder.layers.16.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..b576fa93db026ac1fb53ec083535401a9e3a96b4 --- /dev/null +++ b/decoder.layers.16.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa63b82eca617d95aa9e373a1da93a5317d7598eaf2f9128f74fdb3bcbc3b313 +size 411041792 diff --git a/decoder.layers.16.fc2.bias b/decoder.layers.16.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..47acabdcc44f49f383052f6fad6def77c50a91fe Binary files /dev/null and b/decoder.layers.16.fc2.bias differ diff --git a/decoder.layers.16.fc2.weight b/decoder.layers.16.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..d9b35f54f22664a558b35dd150e03dda71371ac9 --- /dev/null +++ b/decoder.layers.16.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83a02d8a56bdc2ff0ac67c3bc8949ac13c606b00c75300507ce8775cca7ccd1d +size 411041792 diff --git a/decoder.layers.16.final_layer_norm.bias b/decoder.layers.16.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..dc373ecfb3ad0e84bf3aebbbe482fc542ea87e52 Binary files /dev/null and b/decoder.layers.16.final_layer_norm.bias differ diff --git a/decoder.layers.16.final_layer_norm.weight b/decoder.layers.16.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.16.final_layer_norm.weight differ diff --git a/decoder.layers.16.self_attn.catted_head_biases b/decoder.layers.16.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..51a6c9b93dd1f09d7f2e520bc463a583c23af322 Binary files /dev/null and b/decoder.layers.16.self_attn.catted_head_biases differ diff --git a/decoder.layers.16.self_attn.catted_head_weights b/decoder.layers.16.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..13a640b5b6b93c4c4a22304907f1b875e528aede --- /dev/null +++ b/decoder.layers.16.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea11d69c3116a5e2c8f88b1fa47a7a1afdf6f1fbf858002c19b1060b2ed8b796 +size 411041792 diff --git a/decoder.layers.16.self_attn.out_proj.bias b/decoder.layers.16.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..a386d5a44a57ec7653cdd2fe2e671caa42120dab Binary files /dev/null and b/decoder.layers.16.self_attn.out_proj.bias differ diff --git a/decoder.layers.16.self_attn_layer_norm.bias b/decoder.layers.16.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..0f9486f408bd22b3c8419c736e5394a0097416d8 Binary files /dev/null and b/decoder.layers.16.self_attn_layer_norm.bias differ diff --git a/decoder.layers.16.self_attn_layer_norm.weight b/decoder.layers.16.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.16.self_attn_layer_norm.weight differ diff --git a/decoder.layers.17.attn-head-sparsity-predictor.1.weight b/decoder.layers.17.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..5621d4222c0a077cdbfe08ccecbe7282b50044e1 --- /dev/null +++ b/decoder.layers.17.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17d52d114812229ab19752a4e786d9d3d74acfc9c1e8eca3ced072d6b507e83f +size 28673513 diff --git a/decoder.layers.17.attn-head-sparsity-predictor.2.weight b/decoder.layers.17.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..c2a06410145c3351a37635b5eaf28d103e1b525b Binary files /dev/null and b/decoder.layers.17.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.17.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.17.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..92a3253f70852b9d0a965781b8585e034d917b49 --- /dev/null +++ b/decoder.layers.17.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a29d4cca8c6b8d2439a94ffe54832a43590c916c5fe6232467fe95314a99accb +size 28673508 diff --git a/decoder.layers.17.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.17.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..6efa53f0a496a7b0cc2f85d28dce74f7435f552a --- /dev/null +++ b/decoder.layers.17.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e60389eda1099128ee94626d5a262708d1832789681513835f02075fa509ad42 +size 114689508 diff --git a/decoder.layers.17.fc1.bias b/decoder.layers.17.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..a400ab65d41b831bf2be2d87f5f94fa9c6e8f27a Binary files /dev/null and b/decoder.layers.17.fc1.bias differ diff --git a/decoder.layers.17.fc1.weight b/decoder.layers.17.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..5a81b49dbe51f399f454045d99518926337a9978 --- /dev/null +++ b/decoder.layers.17.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04c9f6a40a9cd4526fac624df0a8acda53d553503f5f12cea4c66b5c5d5085db +size 411041792 diff --git a/decoder.layers.17.fc2.bias b/decoder.layers.17.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..3f509ded34e32d7e45a1fe6357b3d23659a0648d Binary files /dev/null and b/decoder.layers.17.fc2.bias differ diff --git a/decoder.layers.17.fc2.weight b/decoder.layers.17.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..889ad75a6a05ffb355fc9a8f665643bd744e6a55 --- /dev/null +++ b/decoder.layers.17.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa424c52d3657c514598226bf4d4e7cfd86830dc4681074cbec0f62a2eba7c16 +size 411041792 diff --git a/decoder.layers.17.final_layer_norm.bias b/decoder.layers.17.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..2f50ea1e2f01945e7b48947ad3ab42efdb07c117 Binary files /dev/null and b/decoder.layers.17.final_layer_norm.bias differ diff --git a/decoder.layers.17.final_layer_norm.weight b/decoder.layers.17.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.17.final_layer_norm.weight differ diff --git a/decoder.layers.17.self_attn.catted_head_biases b/decoder.layers.17.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..83499a7242543a1c6a8a0845a3788849362b4a49 Binary files /dev/null and b/decoder.layers.17.self_attn.catted_head_biases differ diff --git a/decoder.layers.17.self_attn.catted_head_weights b/decoder.layers.17.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..d04ad2db7ef8417beb527f53cc7a5d4e47779b04 --- /dev/null +++ b/decoder.layers.17.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3af9adfdeb6c74b2c35d42fa197ddd51d3c2869116995fef0dabfdc1455677e0 +size 411041792 diff --git a/decoder.layers.17.self_attn.out_proj.bias b/decoder.layers.17.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..dff160d0f8b0330e2e894f9533f694d992de833a Binary files /dev/null and b/decoder.layers.17.self_attn.out_proj.bias differ diff --git a/decoder.layers.17.self_attn_layer_norm.bias b/decoder.layers.17.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..e67768594c8dc4e7ec60396214da1878b5916ccf Binary files /dev/null and b/decoder.layers.17.self_attn_layer_norm.bias differ diff --git a/decoder.layers.17.self_attn_layer_norm.weight b/decoder.layers.17.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.17.self_attn_layer_norm.weight differ diff --git a/decoder.layers.18.attn-head-sparsity-predictor.1.weight b/decoder.layers.18.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..a1662707c31c222268b5ec424dda628c03b049e2 --- /dev/null +++ b/decoder.layers.18.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fd3619dab2254c1b001477b2734a0c48dff4b859c3757266eb4e95831dea4bf +size 28673513 diff --git a/decoder.layers.18.attn-head-sparsity-predictor.2.weight b/decoder.layers.18.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..b822a5e31953f16623b8a7d425c3d8ea9f7837e8 Binary files /dev/null and b/decoder.layers.18.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.18.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.18.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..22813bb309596d31ae534dc4a1bdfc6df7870cad --- /dev/null +++ b/decoder.layers.18.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edbbe5382637edb3e5c10fe5b55280906433835056ac16b5b260410b57bef305 +size 28673508 diff --git a/decoder.layers.18.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.18.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..c3770cc3914e73aed416356a321792f18e248ca3 --- /dev/null +++ b/decoder.layers.18.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:434e5e3f453f9f7aff3491ae1bc584efe51fdb7fe71be8a8f708136cc1820916 +size 114689508 diff --git a/decoder.layers.18.fc1.bias b/decoder.layers.18.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..6039a07760e2d0fe34a84fc021380527cd244a2c Binary files /dev/null and b/decoder.layers.18.fc1.bias differ diff --git a/decoder.layers.18.fc1.weight b/decoder.layers.18.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..45ce4fecd3d20f52edfe30b3ba3ff32c5589fad8 --- /dev/null +++ b/decoder.layers.18.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02bf3913767ba0b11d0daba50b3cdec982a871c4a3479cb396ffa02261ca97ff +size 411041792 diff --git a/decoder.layers.18.fc2.bias b/decoder.layers.18.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..1fd5b9e7964560a9b7dd7cf1ba69977d11e42555 Binary files /dev/null and b/decoder.layers.18.fc2.bias differ diff --git a/decoder.layers.18.fc2.weight b/decoder.layers.18.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..e077c0ea283af64c54eb69d583807fa890e6894d --- /dev/null +++ b/decoder.layers.18.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:058e5dd2525b1bf67c7cfb28f500a574b9ffaa13afd5e52d410e9124cd897912 +size 411041792 diff --git a/decoder.layers.18.final_layer_norm.bias b/decoder.layers.18.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..539b7fe5819d8bb3e7ea1b4826839826069eb093 Binary files /dev/null and b/decoder.layers.18.final_layer_norm.bias differ diff --git a/decoder.layers.18.final_layer_norm.weight b/decoder.layers.18.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.18.final_layer_norm.weight differ diff --git a/decoder.layers.18.self_attn.catted_head_biases b/decoder.layers.18.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..ba3e13c5716d88c9b6449fdecd2404ab8c4135ac Binary files /dev/null and b/decoder.layers.18.self_attn.catted_head_biases differ diff --git a/decoder.layers.18.self_attn.catted_head_weights b/decoder.layers.18.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..6fbb6b953383ef69caef8954371f9db812e224e5 --- /dev/null +++ b/decoder.layers.18.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d45b2ce5fe99795bd7e4fae4524f602c6583ac7d10c721ccee012259166033e +size 411041792 diff --git a/decoder.layers.18.self_attn.out_proj.bias b/decoder.layers.18.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..2315d0f4f4b85f56af7e2fb6514f446194539b25 Binary files /dev/null and b/decoder.layers.18.self_attn.out_proj.bias differ diff --git a/decoder.layers.18.self_attn_layer_norm.bias b/decoder.layers.18.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..a191454882fff39b00d3b5458acaf8bd4e64d9e1 Binary files /dev/null and b/decoder.layers.18.self_attn_layer_norm.bias differ diff --git a/decoder.layers.18.self_attn_layer_norm.weight b/decoder.layers.18.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.18.self_attn_layer_norm.weight differ diff --git a/decoder.layers.19.attn-head-sparsity-predictor.1.weight b/decoder.layers.19.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..391ef39588f879cfd01237674417c879b3167464 --- /dev/null +++ b/decoder.layers.19.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d56753c8cce3c879fd80a23118f2e36b4658926e0eaf49a2c107bc41ef4be5fb +size 28673513 diff --git a/decoder.layers.19.attn-head-sparsity-predictor.2.weight b/decoder.layers.19.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..9c66990a5534d29695ab4453abd6f77885d8ab8d Binary files /dev/null and b/decoder.layers.19.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.19.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.19.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..dbcee80ddfe854c63593e5295f1ee947bf322e7a --- /dev/null +++ b/decoder.layers.19.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb353b9af44b48fd87ffe4ede8fef3cf98f162b9cb7cf1b4fdaeccf6e544e0bd +size 28673508 diff --git a/decoder.layers.19.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.19.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..60a1bf4a4efe732fb97f46b6ba251d083a521255 --- /dev/null +++ b/decoder.layers.19.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05b5fbe6c0d20beb34fa4d5bfb26e608c164380e629852733d2dcf62b11cdd53 +size 114689508 diff --git a/decoder.layers.19.fc1.bias b/decoder.layers.19.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..c77add6e3a3fae883602e76f18ff9b4e4eb30a36 Binary files /dev/null and b/decoder.layers.19.fc1.bias differ diff --git a/decoder.layers.19.fc1.weight b/decoder.layers.19.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..f299e44218dfd04c4a206ea0c97a03d5c3266cae --- /dev/null +++ b/decoder.layers.19.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:286499dab1f138f00ff9282269279714baefc16b9b54f31fac67ba580eda7bf8 +size 411041792 diff --git a/decoder.layers.19.fc2.bias b/decoder.layers.19.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..9987406ba1b2fb50cd3048f89d4a88940519fdec Binary files /dev/null and b/decoder.layers.19.fc2.bias differ diff --git a/decoder.layers.19.fc2.weight b/decoder.layers.19.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..9c72db0e018abd30ccc242869cd6d16e77227f0e --- /dev/null +++ b/decoder.layers.19.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9740e0aced16d4ff14880491dffc2b695d1ca7aad5ab52d3977074d451fe029d +size 411041792 diff --git a/decoder.layers.19.final_layer_norm.bias b/decoder.layers.19.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..1481efae0424ee122e82ddc5f06fbb0dce2c22fe Binary files /dev/null and b/decoder.layers.19.final_layer_norm.bias differ diff --git a/decoder.layers.19.final_layer_norm.weight b/decoder.layers.19.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.19.final_layer_norm.weight differ diff --git a/decoder.layers.19.self_attn.catted_head_biases b/decoder.layers.19.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..f06802878c57249e5247233c6ad29f56b111dd18 Binary files /dev/null and b/decoder.layers.19.self_attn.catted_head_biases differ diff --git a/decoder.layers.19.self_attn.catted_head_weights b/decoder.layers.19.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..d14af26439e18ff6f266f981ebaeed7b1d02a639 --- /dev/null +++ b/decoder.layers.19.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e16d3dde9a4ee557fd1006c0de5723620465969f85caac178cd3c8033a01d3c1 +size 411041792 diff --git a/decoder.layers.19.self_attn.out_proj.bias b/decoder.layers.19.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..259c86b7ec1413997d04032e64a2c8dc54029e18 Binary files /dev/null and b/decoder.layers.19.self_attn.out_proj.bias differ diff --git a/decoder.layers.19.self_attn_layer_norm.bias b/decoder.layers.19.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..1b273f61cb8e51d37b748b4a0f2dbef02bbb3a72 Binary files /dev/null and b/decoder.layers.19.self_attn_layer_norm.bias differ diff --git a/decoder.layers.19.self_attn_layer_norm.weight b/decoder.layers.19.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.19.self_attn_layer_norm.weight differ diff --git a/decoder.layers.2.attn-head-sparsity-predictor.1.weight b/decoder.layers.2.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..056de1eb7648ea0b1292286dd41a9f46c26b7413 --- /dev/null +++ b/decoder.layers.2.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38cd26a84eaa0d628f9aabbec10b4c87841fa4d8e81280405f2116cbf4868b44 +size 28673508 diff --git a/decoder.layers.2.attn-head-sparsity-predictor.2.weight b/decoder.layers.2.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..b35d2443de66fa5e72e4795d5b4b99c2d4082d52 Binary files /dev/null and b/decoder.layers.2.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.2.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.2.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..8592275a1e153e1741dacb53e0dd0950909bfe62 --- /dev/null +++ b/decoder.layers.2.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a0c7e3eab698917f762dc96bcd4e8cf0108ba48f56562e4a2bd379b78eb2050 +size 28673503 diff --git a/decoder.layers.2.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.2.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..86489c8bcc0c5b261011b04a77e524ae13e2166f --- /dev/null +++ b/decoder.layers.2.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e67f13bf80c98b205bcb704c1c4a41598a7178ec24bea88beb6e9412feac901d +size 114689503 diff --git a/decoder.layers.2.fc1.bias b/decoder.layers.2.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..1dc93bcce1b44e1370183a04053adf9b084cb72c Binary files /dev/null and b/decoder.layers.2.fc1.bias differ diff --git a/decoder.layers.2.fc1.weight b/decoder.layers.2.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..7e8a555978c10237c33153aca1899cc2298b1560 --- /dev/null +++ b/decoder.layers.2.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77c8772dbcce679de7dfcffb233225c13b4270e014adb672a2e6fc79a2e2e79d +size 411041792 diff --git a/decoder.layers.2.fc2.bias b/decoder.layers.2.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..26a2bfb903b2d73e6cdeb3f4c58a2f748bc84cf7 Binary files /dev/null and b/decoder.layers.2.fc2.bias differ diff --git a/decoder.layers.2.fc2.weight b/decoder.layers.2.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..200f917dff87b679590996a5a051fddf5e2fe1fb --- /dev/null +++ b/decoder.layers.2.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45a31ac6da915f54f5df0f2ffd8952bb3250a98513b1eaf9f78079d8d23d1ed9 +size 411041792 diff --git a/decoder.layers.2.final_layer_norm.bias b/decoder.layers.2.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..7110afb1ddcd84908b6403fa0aa298faa7237c7c Binary files /dev/null and b/decoder.layers.2.final_layer_norm.bias differ diff --git a/decoder.layers.2.final_layer_norm.weight b/decoder.layers.2.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.2.final_layer_norm.weight differ diff --git a/decoder.layers.2.self_attn.catted_head_biases b/decoder.layers.2.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..8d8cef2a2ec1bea8d083374c0f5acf4befe08f61 Binary files /dev/null and b/decoder.layers.2.self_attn.catted_head_biases differ diff --git a/decoder.layers.2.self_attn.catted_head_weights b/decoder.layers.2.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..3b72e2ecd86722d79c50bf4d7ea09082a7118e13 --- /dev/null +++ b/decoder.layers.2.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b82946ea4ea7a87da9504dc091fe6c060636b4017c72a5de31c6fd4b71c054d +size 411041792 diff --git a/decoder.layers.2.self_attn.out_proj.bias b/decoder.layers.2.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..bbac68964f3be6a70f5c7eb9f744d0f404911b2a Binary files /dev/null and b/decoder.layers.2.self_attn.out_proj.bias differ diff --git a/decoder.layers.2.self_attn_layer_norm.bias b/decoder.layers.2.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..bb764ae4084ce56285be0246e6e03742c119898b Binary files /dev/null and b/decoder.layers.2.self_attn_layer_norm.bias differ diff --git a/decoder.layers.2.self_attn_layer_norm.weight b/decoder.layers.2.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.2.self_attn_layer_norm.weight differ diff --git a/decoder.layers.20.attn-head-sparsity-predictor.1.weight b/decoder.layers.20.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..2e61b469eef5a199c1ee7c4abf82418cad03973c --- /dev/null +++ b/decoder.layers.20.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:221407f01b5998c745ac73d3b2b6dcd8adccd8202dd8431325d95eda8c426753 +size 28673513 diff --git a/decoder.layers.20.attn-head-sparsity-predictor.2.weight b/decoder.layers.20.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..f6cae01a9d33a97900ef5e80a1fdc2a508d8bf11 Binary files /dev/null and b/decoder.layers.20.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.20.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.20.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..e1ff1244e84ee204ec639efbf2ba69a39cc3e919 --- /dev/null +++ b/decoder.layers.20.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aab14227ef413d01fe233787868d69b3a5d1a16f2bdaaa3fb341361a220e5e4c +size 28673508 diff --git a/decoder.layers.20.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.20.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..7b277269c761d8d18038bb4e8fa1750df39c56c4 --- /dev/null +++ b/decoder.layers.20.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ea339e4da42d58ae5aafbcd975f36647481ed017ba138135b88559df2220f5e +size 114689508 diff --git a/decoder.layers.20.fc1.bias b/decoder.layers.20.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..2b2b6d8af8dc83b724924fb075b01c49c8d4c024 Binary files /dev/null and b/decoder.layers.20.fc1.bias differ diff --git a/decoder.layers.20.fc1.weight b/decoder.layers.20.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..08e45d17c157a58ed64580d6576a1edd7120d2dc --- /dev/null +++ b/decoder.layers.20.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f6689946a46d2ca0cd3ee8c87719fd90b57e2d53a3aaea5da2e21224fc1f9a6 +size 411041792 diff --git a/decoder.layers.20.fc2.bias b/decoder.layers.20.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..b8044d66345649b86ae7028adc5738a43442e341 Binary files /dev/null and b/decoder.layers.20.fc2.bias differ diff --git a/decoder.layers.20.fc2.weight b/decoder.layers.20.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..4d8cf958bce16e17649bdf74eb81b6b4a29f8084 --- /dev/null +++ b/decoder.layers.20.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:849265e77d9c70720dd3eed83fbeb312ab5e2c8fc3d04e9237d954cda3fbed3a +size 411041792 diff --git a/decoder.layers.20.final_layer_norm.bias b/decoder.layers.20.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..19b46bc543dd89ca8d3328ac36b91531512d9c6b Binary files /dev/null and b/decoder.layers.20.final_layer_norm.bias differ diff --git a/decoder.layers.20.final_layer_norm.weight b/decoder.layers.20.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.20.final_layer_norm.weight differ diff --git a/decoder.layers.20.self_attn.catted_head_biases b/decoder.layers.20.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..31931a4fd12680e7ef2cc4a9f56f2035bc67cb57 Binary files /dev/null and b/decoder.layers.20.self_attn.catted_head_biases differ diff --git a/decoder.layers.20.self_attn.catted_head_weights b/decoder.layers.20.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..c9adb4c8b76b9203538d9ffe69e0bd4b00545fbe --- /dev/null +++ b/decoder.layers.20.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca2e8d1c13aa45e9e112ecfba42dfac27140507e6bf3165bc7b65f4934c1a5df +size 411041792 diff --git a/decoder.layers.20.self_attn.out_proj.bias b/decoder.layers.20.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..3e7e7583868be72eddaff2e3d338e6ec6fb428c7 Binary files /dev/null and b/decoder.layers.20.self_attn.out_proj.bias differ diff --git a/decoder.layers.20.self_attn_layer_norm.bias b/decoder.layers.20.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..f5c47e3e0eb02fe5bcc7c095be1ddcdac4793dcc Binary files /dev/null and b/decoder.layers.20.self_attn_layer_norm.bias differ diff --git a/decoder.layers.20.self_attn_layer_norm.weight b/decoder.layers.20.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.20.self_attn_layer_norm.weight differ diff --git a/decoder.layers.21.attn-head-sparsity-predictor.1.weight b/decoder.layers.21.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..f9a5385d93983b16ccbe49031591c8d55b1d312f --- /dev/null +++ b/decoder.layers.21.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ceefc4bea0c05e1e97084d5e484195dd5227aa5e6a88d635f84510353d9aabee +size 28673513 diff --git a/decoder.layers.21.attn-head-sparsity-predictor.2.weight b/decoder.layers.21.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..2d614d5407cae4a986e0c200092307c6da71817f Binary files /dev/null and b/decoder.layers.21.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.21.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.21.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..7f6629bb3cf7f6ae18f00f83c7ef4f804e137f71 --- /dev/null +++ b/decoder.layers.21.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8ab18841114af1f7d85348ee9f3975538670dee49f55bf5bd99c7abb0e8bc78 +size 28673508 diff --git a/decoder.layers.21.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.21.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..25d4626b1a99e9ea62131203583e73f6992c5f2c --- /dev/null +++ b/decoder.layers.21.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56cd151ebafbcbb5cbb42e76087f602cf2774f8edcce1b57148404df4351e02a +size 114689508 diff --git a/decoder.layers.21.fc1.bias b/decoder.layers.21.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..3bfe095eda5551472e05273796d2e0680251f61e Binary files /dev/null and b/decoder.layers.21.fc1.bias differ diff --git a/decoder.layers.21.fc1.weight b/decoder.layers.21.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..9d51c4a77ff2820224a15a90147f4d541e05f4c4 --- /dev/null +++ b/decoder.layers.21.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9103b334bfc245dea09e2215a916eedd06e1617ca1978a3ca32e1d2c16588386 +size 411041792 diff --git a/decoder.layers.21.fc2.bias b/decoder.layers.21.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..fefd0417156c4e6ed32f9286baf1e1f4b13ac16e Binary files /dev/null and b/decoder.layers.21.fc2.bias differ diff --git a/decoder.layers.21.fc2.weight b/decoder.layers.21.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..1d7b5c79fb929d7e30bc32b288c62f84039220b3 --- /dev/null +++ b/decoder.layers.21.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c5b1e176ab09ec5268135ab543f61e64c6eb5bfd43375e4079b8b15a6ec8cb7 +size 411041792 diff --git a/decoder.layers.21.final_layer_norm.bias b/decoder.layers.21.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..a90eb43b80a0691b19880b45de1d74f9bb2efdaa Binary files /dev/null and b/decoder.layers.21.final_layer_norm.bias differ diff --git a/decoder.layers.21.final_layer_norm.weight b/decoder.layers.21.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.21.final_layer_norm.weight differ diff --git a/decoder.layers.21.self_attn.catted_head_biases b/decoder.layers.21.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..e0a123fa98bfa02796f0f9a8518c2ee701f8cff5 Binary files /dev/null and b/decoder.layers.21.self_attn.catted_head_biases differ diff --git a/decoder.layers.21.self_attn.catted_head_weights b/decoder.layers.21.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..ec8c71347b3df3aa0f520c61048bef8dd81c0c74 --- /dev/null +++ b/decoder.layers.21.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e99c257eb69fd03dcce0098ab16d51e6bebe7337cdbd5b2234d03c7c8bf08ccd +size 411041792 diff --git a/decoder.layers.21.self_attn.out_proj.bias b/decoder.layers.21.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..e52702ab42b2226cd8999e749841421f76c7f373 Binary files /dev/null and b/decoder.layers.21.self_attn.out_proj.bias differ diff --git a/decoder.layers.21.self_attn_layer_norm.bias b/decoder.layers.21.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..4d8daa844402087bfdf78d802ab083db47b6cbee Binary files /dev/null and b/decoder.layers.21.self_attn_layer_norm.bias differ diff --git a/decoder.layers.21.self_attn_layer_norm.weight b/decoder.layers.21.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.21.self_attn_layer_norm.weight differ diff --git a/decoder.layers.22.attn-head-sparsity-predictor.1.weight b/decoder.layers.22.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..60e58d83936ae79ca6e587bc1d5dd65cca947790 --- /dev/null +++ b/decoder.layers.22.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bc4c45899f622846e789beed9a8af89d2f93c8178f9261072153fd53c5c9d8f +size 28673513 diff --git a/decoder.layers.22.attn-head-sparsity-predictor.2.weight b/decoder.layers.22.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..b6260a860e2a3a4a1d456a8b4ee5f1f7bfec3dd9 Binary files /dev/null and b/decoder.layers.22.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.22.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.22.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..2828da553e713223d66170163222f1a92ada793c --- /dev/null +++ b/decoder.layers.22.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:925564f8b93d1a942043c14a20958419703992690d35871d88fb33517de85fdf +size 28673508 diff --git a/decoder.layers.22.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.22.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..bd71c1460839e634e7804773cb4701f1dfb702f4 --- /dev/null +++ b/decoder.layers.22.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b5cb5aa9242544deca2952702ec02521e01299e7368a5ec2f1ec9bfc709a595 +size 114689508 diff --git a/decoder.layers.22.fc1.bias b/decoder.layers.22.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..8a01e626023ac3f5215b8e6999ecffc7fc7663a2 Binary files /dev/null and b/decoder.layers.22.fc1.bias differ diff --git a/decoder.layers.22.fc1.weight b/decoder.layers.22.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..d70498f4aba9de59bb69ed4faa02434f8037e408 --- /dev/null +++ b/decoder.layers.22.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d573c7d0bf18039037a40d4166b5de81bd5240454953e7ea02950260f539ebb +size 411041792 diff --git a/decoder.layers.22.fc2.bias b/decoder.layers.22.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..d3f777bf02e92f5e467087da598178246eba01a5 Binary files /dev/null and b/decoder.layers.22.fc2.bias differ diff --git a/decoder.layers.22.fc2.weight b/decoder.layers.22.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..c4dcf77d46bb5f8fbab5985393f7703d22a17882 --- /dev/null +++ b/decoder.layers.22.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b8085f92759b1941c5ade3dd5d359add02dad0509fd3a8800ebc64ca73f9adb +size 411041792 diff --git a/decoder.layers.22.final_layer_norm.bias b/decoder.layers.22.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..0cb6c7b148e866a4a48f17f7fec3729594262fe5 Binary files /dev/null and b/decoder.layers.22.final_layer_norm.bias differ diff --git a/decoder.layers.22.final_layer_norm.weight b/decoder.layers.22.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.22.final_layer_norm.weight differ diff --git a/decoder.layers.22.self_attn.catted_head_biases b/decoder.layers.22.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..558b2796ebbcaa02b779e98f115751ba3d18933c Binary files /dev/null and b/decoder.layers.22.self_attn.catted_head_biases differ diff --git a/decoder.layers.22.self_attn.catted_head_weights b/decoder.layers.22.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..1465617cead618bac49bbb7803695c1e8d81e586 --- /dev/null +++ b/decoder.layers.22.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e1da554734e830652bd0be3188eb7d8564df13afc138fe19294cb2be6d7069a +size 411041792 diff --git a/decoder.layers.22.self_attn.out_proj.bias b/decoder.layers.22.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..01abd0cf4a569331240e1fcc97f5edfbec71df3b Binary files /dev/null and b/decoder.layers.22.self_attn.out_proj.bias differ diff --git a/decoder.layers.22.self_attn_layer_norm.bias b/decoder.layers.22.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..cf23f2ed702c56495f5f3126b4202abc7991c8ab Binary files /dev/null and b/decoder.layers.22.self_attn_layer_norm.bias differ diff --git a/decoder.layers.22.self_attn_layer_norm.weight b/decoder.layers.22.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.22.self_attn_layer_norm.weight differ diff --git a/decoder.layers.23.attn-head-sparsity-predictor.1.weight b/decoder.layers.23.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..38a2f9db4d324adbfbf72565b69a73f4bfe7bec6 --- /dev/null +++ b/decoder.layers.23.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47df4ab01780c3cdefbc4f75fed63a6d89e6b058f6e1617c912d7e10db80c323 +size 28673513 diff --git a/decoder.layers.23.attn-head-sparsity-predictor.2.weight b/decoder.layers.23.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..5d5d48fc92269458fc16c4dc9eda196cf6102be6 Binary files /dev/null and b/decoder.layers.23.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.23.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.23.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..33bf61879824c76ba89c232e815dabc9a0f31a3e --- /dev/null +++ b/decoder.layers.23.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2aa107076bfb0fc7b8f7a1040ce17b22b5064ed2a62a4d9fa9148b23892e1d1 +size 28673508 diff --git a/decoder.layers.23.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.23.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..31df0f970238d12fc6f70918ea69c13decf6e00d --- /dev/null +++ b/decoder.layers.23.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b9cc23dd1688848b2c2dcb0834f805b12f56f612a010af00ea7a5d720ded3b0 +size 114689508 diff --git a/decoder.layers.23.fc1.bias b/decoder.layers.23.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..b3ae7ee364a637455de09a1046e1789971b6a514 Binary files /dev/null and b/decoder.layers.23.fc1.bias differ diff --git a/decoder.layers.23.fc1.weight b/decoder.layers.23.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..e7c9a694f09f3fd1b9431cea00252b6c417550ab --- /dev/null +++ b/decoder.layers.23.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:554e9d97185dac5912a8e620c8843c656fa1dfa193e8880a76a40ec016ebdfd0 +size 411041792 diff --git a/decoder.layers.23.fc2.bias b/decoder.layers.23.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..300907f88cfab7f27f90da4482c6097536f137bc Binary files /dev/null and b/decoder.layers.23.fc2.bias differ diff --git a/decoder.layers.23.fc2.weight b/decoder.layers.23.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..9f3a3b07b8da8bafc091fdb33675ab7b87d69230 --- /dev/null +++ b/decoder.layers.23.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f3cb91d027bc00c3bfece52af63ac97b704b32e19dde3bfee21a0c1ffcf1001 +size 411041792 diff --git a/decoder.layers.23.final_layer_norm.bias b/decoder.layers.23.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..92c43108dd87b7a27428da4678ddb44e6420812e Binary files /dev/null and b/decoder.layers.23.final_layer_norm.bias differ diff --git a/decoder.layers.23.final_layer_norm.weight b/decoder.layers.23.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.23.final_layer_norm.weight differ diff --git a/decoder.layers.23.self_attn.catted_head_biases b/decoder.layers.23.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..89cc17a3b48cedd375bc944e46a5506706df43f1 Binary files /dev/null and b/decoder.layers.23.self_attn.catted_head_biases differ diff --git a/decoder.layers.23.self_attn.catted_head_weights b/decoder.layers.23.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..3b95bf7dc8d66a41907d88ff50a4035ddf61b46a --- /dev/null +++ b/decoder.layers.23.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38b9c1c0bdc11ca8f7e3909961e476478417e9dc2ba1a9ef178fc6f64e74ebc9 +size 411041792 diff --git a/decoder.layers.23.self_attn.out_proj.bias b/decoder.layers.23.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..74b8501b010e5db2cdbbc62f4a3187065492cee3 Binary files /dev/null and b/decoder.layers.23.self_attn.out_proj.bias differ diff --git a/decoder.layers.23.self_attn_layer_norm.bias b/decoder.layers.23.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..c233a52d66490bbe9ddc718df2fa888b52097eac Binary files /dev/null and b/decoder.layers.23.self_attn_layer_norm.bias differ diff --git a/decoder.layers.23.self_attn_layer_norm.weight b/decoder.layers.23.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.23.self_attn_layer_norm.weight differ diff --git a/decoder.layers.24.attn-head-sparsity-predictor.1.weight b/decoder.layers.24.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..32ebf4ad544c4144ad8e8992b48477c6d08a0d0a --- /dev/null +++ b/decoder.layers.24.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:559c34c7856a974d7e6ee11129b314ea904301e1962c81b38b8ad79dd89ea956 +size 28673513 diff --git a/decoder.layers.24.attn-head-sparsity-predictor.2.weight b/decoder.layers.24.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..954953804e99cdae469a60c22961dc0d98b3319e Binary files /dev/null and b/decoder.layers.24.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.24.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.24.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..c51b7f98e690edaf92e1c3e9ffe74503c9d14e79 --- /dev/null +++ b/decoder.layers.24.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56ce549baef35bf7114cd933f2786012e994bd42d866c6ddb51975718cd21414 +size 28673508 diff --git a/decoder.layers.24.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.24.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..d7a88fdfd98c9fab35d105b6d649ee32e4007b02 --- /dev/null +++ b/decoder.layers.24.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3eaec441699450f62f7ff5f260a44986797181334b327e1cf7faf926d0e574f +size 114689508 diff --git a/decoder.layers.24.fc1.bias b/decoder.layers.24.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..cf1253694ece93e52e546338112dea1f4a8c88c9 Binary files /dev/null and b/decoder.layers.24.fc1.bias differ diff --git a/decoder.layers.24.fc1.weight b/decoder.layers.24.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..49ae9e75e301e02b4bf6fce6f8195e5e4c0617d5 --- /dev/null +++ b/decoder.layers.24.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9b20b533392c30d0ae87426b04635001736c5488e6e4de396de3b6f1d006277 +size 411041792 diff --git a/decoder.layers.24.fc2.bias b/decoder.layers.24.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..e736fdd7e04e856d4a7e59c07cb70bf312e120be Binary files /dev/null and b/decoder.layers.24.fc2.bias differ diff --git a/decoder.layers.24.fc2.weight b/decoder.layers.24.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..bbbc7ea554f0497c9e81e2a5c8d658ebe4dceb0f --- /dev/null +++ b/decoder.layers.24.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc9194269bfa71b0f2f9ffb660dd9054ad432ad7339ac866b6579882d4e3a634 +size 411041792 diff --git a/decoder.layers.24.final_layer_norm.bias b/decoder.layers.24.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..5e14f136d19fdbf1736cfcf5d1997a5ab2cff0b8 Binary files /dev/null and b/decoder.layers.24.final_layer_norm.bias differ diff --git a/decoder.layers.24.final_layer_norm.weight b/decoder.layers.24.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.24.final_layer_norm.weight differ diff --git a/decoder.layers.24.self_attn.catted_head_biases b/decoder.layers.24.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..ef0ceb5742b2f32d1834c5fbc84fea8d7d278326 Binary files /dev/null and b/decoder.layers.24.self_attn.catted_head_biases differ diff --git a/decoder.layers.24.self_attn.catted_head_weights b/decoder.layers.24.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..3988a3fa3f2e78b7175ba4b70521d7eb542a610e --- /dev/null +++ b/decoder.layers.24.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a9c2fcc2aed07283e51799aea15f117bd59da36dfe7592c5cde0c83e54c5151 +size 411041792 diff --git a/decoder.layers.24.self_attn.out_proj.bias b/decoder.layers.24.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..a038c1ed97a7ca12b573b521be2ed613d1aa30e7 Binary files /dev/null and b/decoder.layers.24.self_attn.out_proj.bias differ diff --git a/decoder.layers.24.self_attn_layer_norm.bias b/decoder.layers.24.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..33729f188b8c86613b43108743e129d61c0b19fc Binary files /dev/null and b/decoder.layers.24.self_attn_layer_norm.bias differ diff --git a/decoder.layers.24.self_attn_layer_norm.weight b/decoder.layers.24.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.24.self_attn_layer_norm.weight differ diff --git a/decoder.layers.25.attn-head-sparsity-predictor.1.weight b/decoder.layers.25.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..52e3546ca71047db7ffa74183cb081ebf18fa5f2 --- /dev/null +++ b/decoder.layers.25.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e27f1fc55d9a3f2dd13d06779690ef56d0ed37e4f9f0d80f6a80f5a6a42a584 +size 28673513 diff --git a/decoder.layers.25.attn-head-sparsity-predictor.2.weight b/decoder.layers.25.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..3f24ec8c2fdb8aea3a7dd7f73889ea790897f362 Binary files /dev/null and b/decoder.layers.25.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.25.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.25.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..657c6e16644367ed3f49fda9d5bffd34e4cf4847 --- /dev/null +++ b/decoder.layers.25.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9e7a92f0e263926e7ae88311c9231272fd8f0bd0b90e3365be2302b6efcc985 +size 28673508 diff --git a/decoder.layers.25.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.25.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..aabacaf3c4ddbe295b8808d486fe294c9bc623e9 --- /dev/null +++ b/decoder.layers.25.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01f5c3a7c828f71fcc1d9eaa935c58a31f7d44e8f3cee1cb362294e08ba3d279 +size 114689508 diff --git a/decoder.layers.25.fc1.bias b/decoder.layers.25.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..937d7c66f861d4c3e9b127f598ced8ef120fd76d Binary files /dev/null and b/decoder.layers.25.fc1.bias differ diff --git a/decoder.layers.25.fc1.weight b/decoder.layers.25.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..19a58eb804b40b06e50e050ac1e4a291861254c2 --- /dev/null +++ b/decoder.layers.25.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d56b0f15910342b4cbe137e428c3faceb28b66e75224d78c02387bf42ee201e8 +size 411041792 diff --git a/decoder.layers.25.fc2.bias b/decoder.layers.25.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..e8138ce01e3ac374d28e04d5442dce995f359bc0 Binary files /dev/null and b/decoder.layers.25.fc2.bias differ diff --git a/decoder.layers.25.fc2.weight b/decoder.layers.25.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..c1a4cc8742ba2897c72e0be7b41cfe12ae04da0d --- /dev/null +++ b/decoder.layers.25.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d0af90a9be6772c3a065124738cdef1a1e618e714bb2cb69d408191ff373a44 +size 411041792 diff --git a/decoder.layers.25.final_layer_norm.bias b/decoder.layers.25.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..f9e9ec78117161dbde4cad4fe19b56f8b966c2ea Binary files /dev/null and b/decoder.layers.25.final_layer_norm.bias differ diff --git a/decoder.layers.25.final_layer_norm.weight b/decoder.layers.25.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.25.final_layer_norm.weight differ diff --git a/decoder.layers.25.self_attn.catted_head_biases b/decoder.layers.25.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..d8323406293bd6d349e502d98c351df6d915681d Binary files /dev/null and b/decoder.layers.25.self_attn.catted_head_biases differ diff --git a/decoder.layers.25.self_attn.catted_head_weights b/decoder.layers.25.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..fb6e6a25f91020fe60253f606e9088ab491267a6 --- /dev/null +++ b/decoder.layers.25.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01dc9d3cc8c018d016c9404f363861a7118782f91181c68e9995b8952badb031 +size 411041792 diff --git a/decoder.layers.25.self_attn.out_proj.bias b/decoder.layers.25.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..ef18b2c3e09d31d4d6e0bf31fa5cf2dc31bda500 Binary files /dev/null and b/decoder.layers.25.self_attn.out_proj.bias differ diff --git a/decoder.layers.25.self_attn_layer_norm.bias b/decoder.layers.25.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..d60ce864890938d4425372dcc7d09826b75abcc7 Binary files /dev/null and b/decoder.layers.25.self_attn_layer_norm.bias differ diff --git a/decoder.layers.25.self_attn_layer_norm.weight b/decoder.layers.25.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.25.self_attn_layer_norm.weight differ diff --git a/decoder.layers.26.attn-head-sparsity-predictor.1.weight b/decoder.layers.26.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..f2dd5f315968cc43ee47f0a3afb94b46d6ab0faf --- /dev/null +++ b/decoder.layers.26.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32bbd64e1433d4d8e28d1048858c570ba9f77f9ec663ea52d7db9438d2b76dab +size 28673513 diff --git a/decoder.layers.26.attn-head-sparsity-predictor.2.weight b/decoder.layers.26.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..e26c54d29288c6988be9a4beac6c4403d3f66013 Binary files /dev/null and b/decoder.layers.26.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.26.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.26.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..372133d85f117ece4aaed34cc192472d7274e854 --- /dev/null +++ b/decoder.layers.26.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e5d99bfac1f7b9c36df45ef850b25369cd10b2d41841905aed43e38b8c229d0 +size 28673508 diff --git a/decoder.layers.26.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.26.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..2e7c87d87ed21830a2ea95a8e3021f960726ecd5 --- /dev/null +++ b/decoder.layers.26.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbf600acefb2d64867f59b139ac1c5b55f891ed5dcdeca52482d408241727459 +size 114689508 diff --git a/decoder.layers.26.fc1.bias b/decoder.layers.26.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..baef924aea0df44faf9e392614f8fdb04f21a95e Binary files /dev/null and b/decoder.layers.26.fc1.bias differ diff --git a/decoder.layers.26.fc1.weight b/decoder.layers.26.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..ff0a910b00978a178536d456e03fe79ef7da7dce --- /dev/null +++ b/decoder.layers.26.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7650536aac310a913123a537a01ae04f1fbcd51df7f17052bccbb2394cf61dd +size 411041792 diff --git a/decoder.layers.26.fc2.bias b/decoder.layers.26.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..ff054b9b9e4d0229cfaea49182a4210c8bbc9952 Binary files /dev/null and b/decoder.layers.26.fc2.bias differ diff --git a/decoder.layers.26.fc2.weight b/decoder.layers.26.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..1c61138e9260ebf06cf25b98841d33fd488ab1d5 --- /dev/null +++ b/decoder.layers.26.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68f6682d5248756de0439877f24a976c8a2206ddec003d2cb08b6a3119f8a439 +size 411041792 diff --git a/decoder.layers.26.final_layer_norm.bias b/decoder.layers.26.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..5339a221fc96452e3dde60105cf2854bce6c66f5 Binary files /dev/null and b/decoder.layers.26.final_layer_norm.bias differ diff --git a/decoder.layers.26.final_layer_norm.weight b/decoder.layers.26.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.26.final_layer_norm.weight differ diff --git a/decoder.layers.26.self_attn.catted_head_biases b/decoder.layers.26.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..570e2f34fa95a066bbbef4231b6a389f8f43d380 Binary files /dev/null and b/decoder.layers.26.self_attn.catted_head_biases differ diff --git a/decoder.layers.26.self_attn.catted_head_weights b/decoder.layers.26.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..205b5f0be5a5d291dc36587eb9eb18e7b3f19958 --- /dev/null +++ b/decoder.layers.26.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f43bdbe9011cdd2bd6110784ed5953fe74e97fe00fe4d8a634692b1bf27743e +size 411041792 diff --git a/decoder.layers.26.self_attn.out_proj.bias b/decoder.layers.26.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..762fcf2d49fe74d0af0d83e66979213118436e29 Binary files /dev/null and b/decoder.layers.26.self_attn.out_proj.bias differ diff --git a/decoder.layers.26.self_attn_layer_norm.bias b/decoder.layers.26.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..7b5b2295640dd946bc3f25bb50e3ff1a09a07736 Binary files /dev/null and b/decoder.layers.26.self_attn_layer_norm.bias differ diff --git a/decoder.layers.26.self_attn_layer_norm.weight b/decoder.layers.26.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.26.self_attn_layer_norm.weight differ diff --git a/decoder.layers.27.attn-head-sparsity-predictor.1.weight b/decoder.layers.27.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..0f86d5a602308e4c140c5f7bc8e811bca113dd7e --- /dev/null +++ b/decoder.layers.27.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eed319574c5ba182da08ed2ee4824a289273ee4b3721eb872fb515bfa8f992e5 +size 28673513 diff --git a/decoder.layers.27.attn-head-sparsity-predictor.2.weight b/decoder.layers.27.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..91d9eea1c7e4eb7183f3f0f67ee2e76ad4a6603c Binary files /dev/null and b/decoder.layers.27.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.27.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.27.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..2f6743443ddb1a81c1d7003f76857ac2ed36a37f --- /dev/null +++ b/decoder.layers.27.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0714a792b0e157bd3c4240f0d852f94ef6199942c66c527fd41e7d4a9c1b35cf +size 28673508 diff --git a/decoder.layers.27.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.27.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..2f29479192cd7ca79a8802a67403467769e33a9e --- /dev/null +++ b/decoder.layers.27.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d34ca6ef06bee4c588b2c48625166ea1ab11ad12ec737159ae83bbc0d2c3ea17 +size 114689508 diff --git a/decoder.layers.27.fc1.bias b/decoder.layers.27.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..02c4f91caecc1cf9356aba824916108489d117c1 Binary files /dev/null and b/decoder.layers.27.fc1.bias differ diff --git a/decoder.layers.27.fc1.weight b/decoder.layers.27.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..1a5aa8de5dc7b801ba03f5dffb0f48d22a6a0e5f --- /dev/null +++ b/decoder.layers.27.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e96b4fd2b5c787dd702275f2275d6d0873a6dbac127fd1a683808812ef9c704 +size 411041792 diff --git a/decoder.layers.27.fc2.bias b/decoder.layers.27.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..f1c1489081c17d887a1c5eb1433d843355cfcfd4 Binary files /dev/null and b/decoder.layers.27.fc2.bias differ diff --git a/decoder.layers.27.fc2.weight b/decoder.layers.27.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..cb25c52fe03bced586a95497fee94de434cdef51 --- /dev/null +++ b/decoder.layers.27.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3d5a4bf66b0fddad107c892cf064b20343dd77a31c0073bb3e80c22d336bc78 +size 411041792 diff --git a/decoder.layers.27.final_layer_norm.bias b/decoder.layers.27.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..019b965072a2c0fb579c275b57f167d1e64fa62b Binary files /dev/null and b/decoder.layers.27.final_layer_norm.bias differ diff --git a/decoder.layers.27.final_layer_norm.weight b/decoder.layers.27.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.27.final_layer_norm.weight differ diff --git a/decoder.layers.27.self_attn.catted_head_biases b/decoder.layers.27.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..2e316fcf44db880115c0b447ece9fc3489b955cb Binary files /dev/null and b/decoder.layers.27.self_attn.catted_head_biases differ diff --git a/decoder.layers.27.self_attn.catted_head_weights b/decoder.layers.27.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..13a0cdd756351e9bc20ac540ce3d3388c07e2dd7 --- /dev/null +++ b/decoder.layers.27.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb7dec19ebe0c46445b70d1703c3850c271e17ee915d3c295676b4bc2b2af4e9 +size 411041792 diff --git a/decoder.layers.27.self_attn.out_proj.bias b/decoder.layers.27.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..5feffa887d330b651892e03b86972de925cd7416 Binary files /dev/null and b/decoder.layers.27.self_attn.out_proj.bias differ diff --git a/decoder.layers.27.self_attn_layer_norm.bias b/decoder.layers.27.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..be469d87648f9b35ff43003fda96feb02e0df9fd Binary files /dev/null and b/decoder.layers.27.self_attn_layer_norm.bias differ diff --git a/decoder.layers.27.self_attn_layer_norm.weight b/decoder.layers.27.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.27.self_attn_layer_norm.weight differ diff --git a/decoder.layers.28.attn-head-sparsity-predictor.1.weight b/decoder.layers.28.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..b2cf7fc96397403c0e2703eccbe0fa0eabc05357 --- /dev/null +++ b/decoder.layers.28.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cc1f9b311b84225935d4f8b1280456bdf9bb331c80e3690c7474e2f87b2c74c +size 28673513 diff --git a/decoder.layers.28.attn-head-sparsity-predictor.2.weight b/decoder.layers.28.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..3f0ff8fc501db7a0f5108e7fc226732de07b47cc Binary files /dev/null and b/decoder.layers.28.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.28.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.28.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..455d17c4d36b3197f6fe0639f7d3c8320175de52 --- /dev/null +++ b/decoder.layers.28.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a809a967a942c92907efe845543c5692146237d257e98490fc7ef879a589df92 +size 28673508 diff --git a/decoder.layers.28.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.28.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..40740ab81e9bf1170371cdfa3910451217667e09 --- /dev/null +++ b/decoder.layers.28.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c170968ab8851370b9449e3cca0d4f61ae148d8f5fb7d27e53e012c90a1ce161 +size 114689508 diff --git a/decoder.layers.28.fc1.bias b/decoder.layers.28.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..971a62d8018402187965e24587e4174b61935158 Binary files /dev/null and b/decoder.layers.28.fc1.bias differ diff --git a/decoder.layers.28.fc1.weight b/decoder.layers.28.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..e686fe1c577d5d4c1fed94988cabf8a329c6fcc2 --- /dev/null +++ b/decoder.layers.28.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:262fe142901a32d10b68024133703cde01e5f8519444f20aa445575980959219 +size 411041792 diff --git a/decoder.layers.28.fc2.bias b/decoder.layers.28.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..1a34ab2c6dde639ad75361f7069dbe11860c20d6 Binary files /dev/null and b/decoder.layers.28.fc2.bias differ diff --git a/decoder.layers.28.fc2.weight b/decoder.layers.28.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..8a09d9fd2793b29540bb3fbeaea25985053f14bf --- /dev/null +++ b/decoder.layers.28.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2a45b474100c389b8c6a5b961a3dcf794fc26726b50645b05e2724b370a48c3 +size 411041792 diff --git a/decoder.layers.28.final_layer_norm.bias b/decoder.layers.28.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..f4164c2c709aa1edd55d5c4f32ee80b45492545d Binary files /dev/null and b/decoder.layers.28.final_layer_norm.bias differ diff --git a/decoder.layers.28.final_layer_norm.weight b/decoder.layers.28.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.28.final_layer_norm.weight differ diff --git a/decoder.layers.28.self_attn.catted_head_biases b/decoder.layers.28.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..32aac3b7db8894b844ecc6df583b6094d9216b58 Binary files /dev/null and b/decoder.layers.28.self_attn.catted_head_biases differ diff --git a/decoder.layers.28.self_attn.catted_head_weights b/decoder.layers.28.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..7073db7624bd6356cc497b9396ec9783e247be7c --- /dev/null +++ b/decoder.layers.28.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7c4d94801dd6b48cfd7ff9e6406021339eb9cefcbdec3fadc107bdd75f3dcb9 +size 411041792 diff --git a/decoder.layers.28.self_attn.out_proj.bias b/decoder.layers.28.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..b277e987314ccd61608afb76fccdfbc584a2773a Binary files /dev/null and b/decoder.layers.28.self_attn.out_proj.bias differ diff --git a/decoder.layers.28.self_attn_layer_norm.bias b/decoder.layers.28.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..6c5d230e0f09d885e9621ca5f9d9a19b33220d44 Binary files /dev/null and b/decoder.layers.28.self_attn_layer_norm.bias differ diff --git a/decoder.layers.28.self_attn_layer_norm.weight b/decoder.layers.28.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.28.self_attn_layer_norm.weight differ diff --git a/decoder.layers.29.attn-head-sparsity-predictor.1.weight b/decoder.layers.29.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..ef82366db7afeabb0a1f9069e730721aac6f340c --- /dev/null +++ b/decoder.layers.29.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d05946054fe6ab3202b1ca7f7fe9a99ebe4a72c937b443ea472f8871490f691 +size 28673513 diff --git a/decoder.layers.29.attn-head-sparsity-predictor.2.weight b/decoder.layers.29.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..65cd39fe1c31b79d110e84987a64fbbebd182553 Binary files /dev/null and b/decoder.layers.29.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.29.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.29.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..501cacfb22f5ea8671aaa3a17ab000de363b6f47 --- /dev/null +++ b/decoder.layers.29.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d30e20ed16faa378b2859d95a36aa01addd0231d1832487468ebe56704cc0bae +size 28673508 diff --git a/decoder.layers.29.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.29.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..c2c2118c36642ac75eec6a2805794f823f82e7f6 --- /dev/null +++ b/decoder.layers.29.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a5640f55a9d6766aaee1f87e55e21e5054f4cdfa75487a44f806e495a74af97 +size 114689508 diff --git a/decoder.layers.29.fc1.bias b/decoder.layers.29.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..3de736e3dec1d351501adc957e29c83f15e06062 Binary files /dev/null and b/decoder.layers.29.fc1.bias differ diff --git a/decoder.layers.29.fc1.weight b/decoder.layers.29.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..06db8b045ca02282df93cf43b5988618cd3b8579 --- /dev/null +++ b/decoder.layers.29.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9ba809ba8576340d45fac1c166109b1209f965b4f9263351acbe26a46a44189 +size 411041792 diff --git a/decoder.layers.29.fc2.bias b/decoder.layers.29.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..35c62f9e003c2f60bda98a4355bb3f8988355008 Binary files /dev/null and b/decoder.layers.29.fc2.bias differ diff --git a/decoder.layers.29.fc2.weight b/decoder.layers.29.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..1f8d4141256c4ea9e417e5a1ed38c8a391390a81 --- /dev/null +++ b/decoder.layers.29.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:456670d295ce1d521c8ab3519013d04b6c88b876b4b8d50509e64d142e7a6374 +size 411041792 diff --git a/decoder.layers.29.final_layer_norm.bias b/decoder.layers.29.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..b4a9a7f960f7350cf7255ce0892f9a9c10d7c7de Binary files /dev/null and b/decoder.layers.29.final_layer_norm.bias differ diff --git a/decoder.layers.29.final_layer_norm.weight b/decoder.layers.29.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.29.final_layer_norm.weight differ diff --git a/decoder.layers.29.self_attn.catted_head_biases b/decoder.layers.29.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..ef6d310eb7ed3f50f7687ee6a6e97a26bfe39318 Binary files /dev/null and b/decoder.layers.29.self_attn.catted_head_biases differ diff --git a/decoder.layers.29.self_attn.catted_head_weights b/decoder.layers.29.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..c0b4073e78ed7aa68ad7418a488de482ab043474 --- /dev/null +++ b/decoder.layers.29.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b818d44e831e03098e8554b4a4f776bcd5b53e5fc7492c1e6374027c7fd822f1 +size 411041792 diff --git a/decoder.layers.29.self_attn.out_proj.bias b/decoder.layers.29.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..9a7958e4f9ebfc22ec2106852f24fcafce957861 Binary files /dev/null and b/decoder.layers.29.self_attn.out_proj.bias differ diff --git a/decoder.layers.29.self_attn_layer_norm.bias b/decoder.layers.29.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..a6c80e21a7b43203a8c1b6bca9943073dc504509 Binary files /dev/null and b/decoder.layers.29.self_attn_layer_norm.bias differ diff --git a/decoder.layers.29.self_attn_layer_norm.weight b/decoder.layers.29.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.29.self_attn_layer_norm.weight differ diff --git a/decoder.layers.3.attn-head-sparsity-predictor.1.weight b/decoder.layers.3.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..9fe37fcae8f6cd30cd2338459b6ad6622cf091e5 --- /dev/null +++ b/decoder.layers.3.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc74762542e6d94fddbc5a7b562619abe46d342a6d15c53965f2604ae02bd25b +size 28673508 diff --git a/decoder.layers.3.attn-head-sparsity-predictor.2.weight b/decoder.layers.3.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..5bed21de61017bc7c5555686d904a640e6e1cd6b Binary files /dev/null and b/decoder.layers.3.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.3.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.3.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..b4dfd563a0bd804a982785aa2d174e9716f3ba45 --- /dev/null +++ b/decoder.layers.3.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8aa6fcac1ea76531e11e2d86ec3fdd1d456959d280602faf0ac524286aac2a1 +size 28673503 diff --git a/decoder.layers.3.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.3.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..22e47bb7eff7d53c9705363ce4b40bd0fcf2bd1f --- /dev/null +++ b/decoder.layers.3.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b4c9d70931ad1e69ebb7294f4138054353dbd9445b0df331dbceb6670055bac +size 114689503 diff --git a/decoder.layers.3.fc1.bias b/decoder.layers.3.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..673441c3856aa4e3427e620d4ad857dc1f3d65c3 Binary files /dev/null and b/decoder.layers.3.fc1.bias differ diff --git a/decoder.layers.3.fc1.weight b/decoder.layers.3.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..7140b935619716842a83ecd02d1b1b8cdb9689ae --- /dev/null +++ b/decoder.layers.3.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c70e0f74709467f9a1e65565a9740ed9b51c3fa94f4e4aca3bbbbf1b5e87a556 +size 411041792 diff --git a/decoder.layers.3.fc2.bias b/decoder.layers.3.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..c949c4f4e0452ecbdb60154c697e94838ff2f0e5 Binary files /dev/null and b/decoder.layers.3.fc2.bias differ diff --git a/decoder.layers.3.fc2.weight b/decoder.layers.3.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..d475473bbb18cb2e6c7f92543dbab23612398d4a --- /dev/null +++ b/decoder.layers.3.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a78b44c8db361bbfa34839701e09ddc3a7335377ebccb18b505be39ef1073d8 +size 411041792 diff --git a/decoder.layers.3.final_layer_norm.bias b/decoder.layers.3.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..b52128eb13905540550db5b676e8024897bb35ed Binary files /dev/null and b/decoder.layers.3.final_layer_norm.bias differ diff --git a/decoder.layers.3.final_layer_norm.weight b/decoder.layers.3.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.3.final_layer_norm.weight differ diff --git a/decoder.layers.3.self_attn.catted_head_biases b/decoder.layers.3.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..4d3f472fa6624c43ab55abfd9d1dc58a18c70209 Binary files /dev/null and b/decoder.layers.3.self_attn.catted_head_biases differ diff --git a/decoder.layers.3.self_attn.catted_head_weights b/decoder.layers.3.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..376696781df0ac7d08d6338932fcf878af7ecdb8 --- /dev/null +++ b/decoder.layers.3.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48622237118c8f2b69b918ab89f29f074f8763a3309080d4394e7d335de6c7c3 +size 411041792 diff --git a/decoder.layers.3.self_attn.out_proj.bias b/decoder.layers.3.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..3dfb733b289598742e82b4dd9f95b4336f692f5f Binary files /dev/null and b/decoder.layers.3.self_attn.out_proj.bias differ diff --git a/decoder.layers.3.self_attn_layer_norm.bias b/decoder.layers.3.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..fd38d71459b968b1e48f730198863fd9c79eea26 Binary files /dev/null and b/decoder.layers.3.self_attn_layer_norm.bias differ diff --git a/decoder.layers.3.self_attn_layer_norm.weight b/decoder.layers.3.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.3.self_attn_layer_norm.weight differ diff --git a/decoder.layers.30.attn-head-sparsity-predictor.1.weight b/decoder.layers.30.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..b910f8459c2c477441e5b6a9f4b419e64a7903a9 --- /dev/null +++ b/decoder.layers.30.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a102d65ce9bb5b2595850d9b63d0bd96da360efe2a454014445c905d06468e8 +size 28673513 diff --git a/decoder.layers.30.attn-head-sparsity-predictor.2.weight b/decoder.layers.30.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..7c7b73c8bf74f699194abc3e4d6a32cb58de51b2 Binary files /dev/null and b/decoder.layers.30.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.30.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.30.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..c9d79a813fdcb56a4e715e542598170051cf6795 --- /dev/null +++ b/decoder.layers.30.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f0b9921980322979372686c857fa9792f2e65f58433fbceb93013dda2ac272d +size 28673508 diff --git a/decoder.layers.30.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.30.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..b2d3e0a2dcdbf3a244d686edd4078d5ddb342522 --- /dev/null +++ b/decoder.layers.30.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ce777cbf5daf5e83c5e3b4f081d81fcd3133375f1ff442c8fc08d7d32d0167e +size 114689508 diff --git a/decoder.layers.30.fc1.bias b/decoder.layers.30.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..41a5d5307b64c4465054eaf4153a44846426c397 Binary files /dev/null and b/decoder.layers.30.fc1.bias differ diff --git a/decoder.layers.30.fc1.weight b/decoder.layers.30.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..9dded8458f178ae00c57dbf035cba3f7c38097f8 --- /dev/null +++ b/decoder.layers.30.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e0f4a322ca21b3c8ea4fc20999933eb9d1376c8b662df2255d44fc17a9d6420 +size 411041792 diff --git a/decoder.layers.30.fc2.bias b/decoder.layers.30.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..1dc20fbb5da7df53e56612fba0b2bf93410d4747 Binary files /dev/null and b/decoder.layers.30.fc2.bias differ diff --git a/decoder.layers.30.fc2.weight b/decoder.layers.30.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..68757261f4fe156365334e078a0e80924d28ea6b --- /dev/null +++ b/decoder.layers.30.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a076bee0eba170870552bfefdff881fea7aa8b4bc5a92a6224a752ed2c9b53fe +size 411041792 diff --git a/decoder.layers.30.final_layer_norm.bias b/decoder.layers.30.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..67067963da99be1b45e15ba2b50099382a94eafb Binary files /dev/null and b/decoder.layers.30.final_layer_norm.bias differ diff --git a/decoder.layers.30.final_layer_norm.weight b/decoder.layers.30.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.30.final_layer_norm.weight differ diff --git a/decoder.layers.30.self_attn.catted_head_biases b/decoder.layers.30.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..3329b7bef477efa9b010e7edd14f8ded7f88021e Binary files /dev/null and b/decoder.layers.30.self_attn.catted_head_biases differ diff --git a/decoder.layers.30.self_attn.catted_head_weights b/decoder.layers.30.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..83d732f3fc5ba991542e58edb96b0f7418db1686 --- /dev/null +++ b/decoder.layers.30.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:078ebf41f582a541d52a742542bf4ab5be3df745075a080af946ef8d93cfc087 +size 411041792 diff --git a/decoder.layers.30.self_attn.out_proj.bias b/decoder.layers.30.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..e354c0947a2e52ad53b2f2c0fd14cdef51553469 Binary files /dev/null and b/decoder.layers.30.self_attn.out_proj.bias differ diff --git a/decoder.layers.30.self_attn_layer_norm.bias b/decoder.layers.30.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..fbf312e5b7505e13f471e640c884468eab6c24d9 Binary files /dev/null and b/decoder.layers.30.self_attn_layer_norm.bias differ diff --git a/decoder.layers.30.self_attn_layer_norm.weight b/decoder.layers.30.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.30.self_attn_layer_norm.weight differ diff --git a/decoder.layers.31.attn-head-sparsity-predictor.1.weight b/decoder.layers.31.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..a21e3fad8b83950deaf936b410eb33732730289d --- /dev/null +++ b/decoder.layers.31.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cc9c9fd7d08c69abc66d7d88b12c9ab3dbe07eabf1b25116662a59182e95d43 +size 28673513 diff --git a/decoder.layers.31.attn-head-sparsity-predictor.2.weight b/decoder.layers.31.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..8624736aa9256c2f3bdd850d3467c5cefa666887 Binary files /dev/null and b/decoder.layers.31.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.31.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.31.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..ba092456d237cf35a4abbacba1eba59af9839a39 --- /dev/null +++ b/decoder.layers.31.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cedc9ef2e24262750c341f11dfa4cfa910733bc59339b0e177778e1238f0861d +size 28673508 diff --git a/decoder.layers.31.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.31.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..e8f9e96f6198e22ebf415433e5052fdd393071e3 --- /dev/null +++ b/decoder.layers.31.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:457c1d61a5a2c5f83164201f6878708b038d137ed2ca1ffe3f6c903c7cc2a1f2 +size 114689508 diff --git a/decoder.layers.31.fc1.bias b/decoder.layers.31.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..b8a9ec5f185faa6187f63312dd03b775f10b62fd Binary files /dev/null and b/decoder.layers.31.fc1.bias differ diff --git a/decoder.layers.31.fc1.weight b/decoder.layers.31.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..83e42d9fa18982ef11d01c61a57d55cec7337c1f --- /dev/null +++ b/decoder.layers.31.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78ff2f63f95042f77aba834f9486a2f3af078f08b0be58a81cc344477dd8cf9c +size 411041792 diff --git a/decoder.layers.31.fc2.bias b/decoder.layers.31.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..901df61136d788e83dd805a00be13f5bd92f3fe6 Binary files /dev/null and b/decoder.layers.31.fc2.bias differ diff --git a/decoder.layers.31.fc2.weight b/decoder.layers.31.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..038d6567dd99e05479042092ccefbc96975b7699 --- /dev/null +++ b/decoder.layers.31.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79a4e0a09f67dff68c22a381928570c5afc458a5a66965496f61c63bf1a538e2 +size 411041792 diff --git a/decoder.layers.31.final_layer_norm.bias b/decoder.layers.31.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..2023142ea718c76047af03e4b3c13d06b47568f6 Binary files /dev/null and b/decoder.layers.31.final_layer_norm.bias differ diff --git a/decoder.layers.31.final_layer_norm.weight b/decoder.layers.31.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.31.final_layer_norm.weight differ diff --git a/decoder.layers.31.self_attn.catted_head_biases b/decoder.layers.31.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..fd39a9ce52b22f61b748bee65f2d141e83d97f0a Binary files /dev/null and b/decoder.layers.31.self_attn.catted_head_biases differ diff --git a/decoder.layers.31.self_attn.catted_head_weights b/decoder.layers.31.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..7579f70425a6251dbaf492d3b658055aee61441e --- /dev/null +++ b/decoder.layers.31.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ae9eeb516f8cb9994045367dd63a5a979e05d739a89b0bff6bbd049561aaca6 +size 411041792 diff --git a/decoder.layers.31.self_attn.out_proj.bias b/decoder.layers.31.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..f340c89c5ef4110ed6e6b1090216a03e71b5fc28 Binary files /dev/null and b/decoder.layers.31.self_attn.out_proj.bias differ diff --git a/decoder.layers.31.self_attn_layer_norm.bias b/decoder.layers.31.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..a549c31df250774b8972601f3bf3ff01435f7b6c Binary files /dev/null and b/decoder.layers.31.self_attn_layer_norm.bias differ diff --git a/decoder.layers.31.self_attn_layer_norm.weight b/decoder.layers.31.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.31.self_attn_layer_norm.weight differ diff --git a/decoder.layers.32.attn-head-sparsity-predictor.1.weight b/decoder.layers.32.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..629669b985d914c6c3cf76b2a8d44087afdc0255 --- /dev/null +++ b/decoder.layers.32.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75c487776c0570ffbc6448043f7d4972dc106ea15f569d73d3567d03eecb8cce +size 28673513 diff --git a/decoder.layers.32.attn-head-sparsity-predictor.2.weight b/decoder.layers.32.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..e4f6745bbe5ca1846ccdfcae171198cb05f623bd Binary files /dev/null and b/decoder.layers.32.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.32.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.32.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..904d6dbf1b45ad21a7c035c12d54d6fc73d03f08 --- /dev/null +++ b/decoder.layers.32.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3f200f0e2066362dd89c8c55063e689921390fe0d917d2774b300b9c26cd321 +size 28673508 diff --git a/decoder.layers.32.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.32.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..e8952c5812f88898953f8c06e7cd3166496f7001 --- /dev/null +++ b/decoder.layers.32.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19ef2d43fbaae8adc3ad959d004d580b9ab5801b9f578fbf2cf6cc57c51e6179 +size 114689508 diff --git a/decoder.layers.32.fc1.bias b/decoder.layers.32.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..d1c8f278160ac2899c6d73ff21e2465eb44ce264 Binary files /dev/null and b/decoder.layers.32.fc1.bias differ diff --git a/decoder.layers.32.fc1.weight b/decoder.layers.32.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..62906bbdae0ce52be4958e6334ea48c6f7c25036 --- /dev/null +++ b/decoder.layers.32.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c799357577b572a625d4ea3c9db89bd9403edb642067c97d5c36c9fdc59c88d +size 411041792 diff --git a/decoder.layers.32.fc2.bias b/decoder.layers.32.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..cce3664dc5902292efec7cb384a70757d1ee3a54 Binary files /dev/null and b/decoder.layers.32.fc2.bias differ diff --git a/decoder.layers.32.fc2.weight b/decoder.layers.32.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..1b6c0da50bc878fb07341fd39c3f11c9ebc58fc0 --- /dev/null +++ b/decoder.layers.32.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6afed4311eac81ce4f7aa6a8c5ce088451ede1293c445bdfc258b91a6078e9ed +size 411041792 diff --git a/decoder.layers.32.final_layer_norm.bias b/decoder.layers.32.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..f35a9cb4673a287e0f0402e8e295bd0ab12cac70 Binary files /dev/null and b/decoder.layers.32.final_layer_norm.bias differ diff --git a/decoder.layers.32.final_layer_norm.weight b/decoder.layers.32.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.32.final_layer_norm.weight differ diff --git a/decoder.layers.32.self_attn.catted_head_biases b/decoder.layers.32.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..7f3525533a84cf6ad78f36e344cf0514f708b9dd Binary files /dev/null and b/decoder.layers.32.self_attn.catted_head_biases differ diff --git a/decoder.layers.32.self_attn.catted_head_weights b/decoder.layers.32.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..78ab089001ad2f1f4139eb5d3ca0b909bcb6d8a4 --- /dev/null +++ b/decoder.layers.32.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d48261d5045ee43b70bf016eb533dca4cf6aa2504d25f7c88d484aa019da2ede +size 411041792 diff --git a/decoder.layers.32.self_attn.out_proj.bias b/decoder.layers.32.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..3ded008727d2162384682e5800e3f0a4908e4989 Binary files /dev/null and b/decoder.layers.32.self_attn.out_proj.bias differ diff --git a/decoder.layers.32.self_attn_layer_norm.bias b/decoder.layers.32.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..e8229425d0b5e8113c555cda010a5eb013707080 Binary files /dev/null and b/decoder.layers.32.self_attn_layer_norm.bias differ diff --git a/decoder.layers.32.self_attn_layer_norm.weight b/decoder.layers.32.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.32.self_attn_layer_norm.weight differ diff --git a/decoder.layers.33.attn-head-sparsity-predictor.1.weight b/decoder.layers.33.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..b2393c1d7aed3dc456976e51e04ffed439ff0636 --- /dev/null +++ b/decoder.layers.33.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5acd5eebbae74bbe71ce9e7525d17deada63ab91e93df5334be53dc1099aa85 +size 28673513 diff --git a/decoder.layers.33.attn-head-sparsity-predictor.2.weight b/decoder.layers.33.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..ec8f3a56ead4941fd7c2d7c4cf2172c02e4099e7 Binary files /dev/null and b/decoder.layers.33.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.33.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.33.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..45aa6faa58a9c822e5feaf900b2b242b3c2c196e --- /dev/null +++ b/decoder.layers.33.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e27beb0d302fc1e6c83689c320463ff4314e02449136c9708f9a855d3533782 +size 28673508 diff --git a/decoder.layers.33.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.33.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..c1d2934c9983e39404a00f35ee8e73a3ee4e5d0a --- /dev/null +++ b/decoder.layers.33.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8c62d7357cb714a4ac92084ded541fa71bf21cb7356fa0609429e8140785dcc +size 114689508 diff --git a/decoder.layers.33.fc1.bias b/decoder.layers.33.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..526ca939b03d979d4cc6623f6ec8f083a15cc591 Binary files /dev/null and b/decoder.layers.33.fc1.bias differ diff --git a/decoder.layers.33.fc1.weight b/decoder.layers.33.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..ac54d8a35696fafb93c0b95fb3e58b3d61318a83 --- /dev/null +++ b/decoder.layers.33.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efcc46bc26c1745ba7d40884df0174302ab5598dcb066806f4eafe47c3ec4e68 +size 411041792 diff --git a/decoder.layers.33.fc2.bias b/decoder.layers.33.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..5783d53abb0f846cf1d1f3aa237f0698b3d49199 Binary files /dev/null and b/decoder.layers.33.fc2.bias differ diff --git a/decoder.layers.33.fc2.weight b/decoder.layers.33.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..12692377aaae33471325f18e4b73f9be4478d1d2 --- /dev/null +++ b/decoder.layers.33.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:292d0ff24043c0a531d2ebe0142f915d8a509e6953203cc685ddbb034715a3d1 +size 411041792 diff --git a/decoder.layers.33.final_layer_norm.bias b/decoder.layers.33.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..f9f96fcf9c4aee9ba5666eafc0ebbbb461ae0846 Binary files /dev/null and b/decoder.layers.33.final_layer_norm.bias differ diff --git a/decoder.layers.33.final_layer_norm.weight b/decoder.layers.33.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.33.final_layer_norm.weight differ diff --git a/decoder.layers.33.self_attn.catted_head_biases b/decoder.layers.33.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..c7bb3018f2973f5267fbc9af435cfc98aeabbeba Binary files /dev/null and b/decoder.layers.33.self_attn.catted_head_biases differ diff --git a/decoder.layers.33.self_attn.catted_head_weights b/decoder.layers.33.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..ddc2b9d292460707979c60ac9d730874be0b4f78 --- /dev/null +++ b/decoder.layers.33.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:648283185d0258a9038c3dee88e2af4b81bb2bcdd62632d0d144d114c011fd6e +size 411041792 diff --git a/decoder.layers.33.self_attn.out_proj.bias b/decoder.layers.33.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..fd288e582a1e91b3bc9ba62831699bcb3e1edfda Binary files /dev/null and b/decoder.layers.33.self_attn.out_proj.bias differ diff --git a/decoder.layers.33.self_attn_layer_norm.bias b/decoder.layers.33.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..27e26a8cc64a08c883075009f259a6fe13fbd540 Binary files /dev/null and b/decoder.layers.33.self_attn_layer_norm.bias differ diff --git a/decoder.layers.33.self_attn_layer_norm.weight b/decoder.layers.33.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.33.self_attn_layer_norm.weight differ diff --git a/decoder.layers.34.attn-head-sparsity-predictor.1.weight b/decoder.layers.34.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..a9f97a76548ba77238425350491927398fc30362 --- /dev/null +++ b/decoder.layers.34.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e027015d0751f9a950d7fb0cbd74f92d4e571ab00307127a2722434e5abb107 +size 28673513 diff --git a/decoder.layers.34.attn-head-sparsity-predictor.2.weight b/decoder.layers.34.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..9495279cef6d7d4c941d82b9355b9aab6177e408 Binary files /dev/null and b/decoder.layers.34.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.34.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.34.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..21b4860eab7721e15bbb6c7f5de7e1c639c55316 --- /dev/null +++ b/decoder.layers.34.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:531928a430b731269f47030ea3483de1b46453b9324103be43fedf7be51e242b +size 28673508 diff --git a/decoder.layers.34.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.34.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..18a4c1df75b256dbbf69aca938a2ff97e8d9b41c --- /dev/null +++ b/decoder.layers.34.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a0e9108494848c454ff58bc5ab77ffbb540ae4bfbf01ae87d6ee6a1ab26da2f +size 114689508 diff --git a/decoder.layers.34.fc1.bias b/decoder.layers.34.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..dec5f64f2c9356f7c2580a9b2bde7d69b7b51a80 Binary files /dev/null and b/decoder.layers.34.fc1.bias differ diff --git a/decoder.layers.34.fc1.weight b/decoder.layers.34.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..38e50fee9d1647149034a8acc18d0533e645212b --- /dev/null +++ b/decoder.layers.34.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b080843787747ffa83a223325a6a961911474c575637a43217f50e13b95eefdb +size 411041792 diff --git a/decoder.layers.34.fc2.bias b/decoder.layers.34.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..982cd183424074c5e07370d85a42fbe1ebd5a0be Binary files /dev/null and b/decoder.layers.34.fc2.bias differ diff --git a/decoder.layers.34.fc2.weight b/decoder.layers.34.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..a2f333b979029c0117232894e49257583090aa4a --- /dev/null +++ b/decoder.layers.34.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:009945c8fe47a8ea5596274951cd22e63e68a0b60eac1a6e43b407b5ebe92cbf +size 411041792 diff --git a/decoder.layers.34.final_layer_norm.bias b/decoder.layers.34.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..cde04c63822186dadb40358fb2fa839c54734db6 Binary files /dev/null and b/decoder.layers.34.final_layer_norm.bias differ diff --git a/decoder.layers.34.final_layer_norm.weight b/decoder.layers.34.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.34.final_layer_norm.weight differ diff --git a/decoder.layers.34.self_attn.catted_head_biases b/decoder.layers.34.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..8cd68dae47c33b8de92843d5fb23f3ebf814081a Binary files /dev/null and b/decoder.layers.34.self_attn.catted_head_biases differ diff --git a/decoder.layers.34.self_attn.catted_head_weights b/decoder.layers.34.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..e3a48be3317b40dbacb981ad0e307bbe41704e3b --- /dev/null +++ b/decoder.layers.34.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7264e9db4b9d0e7f3810f6b44f92eb21af06342b262278670e39aa17e14fb259 +size 411041792 diff --git a/decoder.layers.34.self_attn.out_proj.bias b/decoder.layers.34.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..545d7bc4bc748c75fc04683a6b32c8b39052b4bd Binary files /dev/null and b/decoder.layers.34.self_attn.out_proj.bias differ diff --git a/decoder.layers.34.self_attn_layer_norm.bias b/decoder.layers.34.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..abf1cd29174342b672c01a5738a5cd68c8b0c0de Binary files /dev/null and b/decoder.layers.34.self_attn_layer_norm.bias differ diff --git a/decoder.layers.34.self_attn_layer_norm.weight b/decoder.layers.34.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.34.self_attn_layer_norm.weight differ diff --git a/decoder.layers.35.attn-head-sparsity-predictor.1.weight b/decoder.layers.35.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..b2ae5cb5ced07661f4f10f77b79773f1e6cbf0d4 --- /dev/null +++ b/decoder.layers.35.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccd33b6aadba3abdd82e7ed1137084f65df047d0476cf4bde2793443cf2e5aab +size 28673513 diff --git a/decoder.layers.35.attn-head-sparsity-predictor.2.weight b/decoder.layers.35.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..8e05470a7beac07b024ec309a467234abfea0bfb Binary files /dev/null and b/decoder.layers.35.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.35.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.35.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..7d053ec6c712a2b3299f4c2a7312840c3939205f --- /dev/null +++ b/decoder.layers.35.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d97b79f09f940fb4466a10d3e87d434e82d17839f8cb638299824f27c434503 +size 28673508 diff --git a/decoder.layers.35.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.35.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..2ca2a94bbd92d5dd1c5d0d6e5078677286c5ff52 --- /dev/null +++ b/decoder.layers.35.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e0745a7e2592a2e6f4a420698c3e3632c6d059722ba167c4eb02dc252d2a1d6 +size 114689508 diff --git a/decoder.layers.35.fc1.bias b/decoder.layers.35.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..6b7dc2e85ef4ee884aa72f14823ea9eab4f93e3a Binary files /dev/null and b/decoder.layers.35.fc1.bias differ diff --git a/decoder.layers.35.fc1.weight b/decoder.layers.35.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..307df75b0b0c07e5aacf232fe49de9f5c52a099d --- /dev/null +++ b/decoder.layers.35.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31bb5930e14fda9e47da01edf709059426253cb7680ee7234eee49b44e6fb73f +size 411041792 diff --git a/decoder.layers.35.fc2.bias b/decoder.layers.35.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..62e97e61594886ecf2bd72e8e12a6465c66bb07e Binary files /dev/null and b/decoder.layers.35.fc2.bias differ diff --git a/decoder.layers.35.fc2.weight b/decoder.layers.35.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..b0068e824b74cd1b78a4d43a6a29c2c7f6bc4a37 --- /dev/null +++ b/decoder.layers.35.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfc24e0ee26a1bba84384b1202f766c8cc735f24fdc62a917cfaf181de9c14ba +size 411041792 diff --git a/decoder.layers.35.final_layer_norm.bias b/decoder.layers.35.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..b4239d11767799a94a6988ce2eb0af2dc98f4cf1 Binary files /dev/null and b/decoder.layers.35.final_layer_norm.bias differ diff --git a/decoder.layers.35.final_layer_norm.weight b/decoder.layers.35.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.35.final_layer_norm.weight differ diff --git a/decoder.layers.35.self_attn.catted_head_biases b/decoder.layers.35.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..c1c30878ed873fd3cdceb57433fdb955cfeac712 Binary files /dev/null and b/decoder.layers.35.self_attn.catted_head_biases differ diff --git a/decoder.layers.35.self_attn.catted_head_weights b/decoder.layers.35.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..2288cd261c455868ccabe5261408e62f19e6e685 --- /dev/null +++ b/decoder.layers.35.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5dc0da2f78e87b51725b2c80580255255ed2f597a99977709da20dfb28a3e61 +size 411041792 diff --git a/decoder.layers.35.self_attn.out_proj.bias b/decoder.layers.35.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..210ec5cb98a176eddc50f4b91458411fe007d9da Binary files /dev/null and b/decoder.layers.35.self_attn.out_proj.bias differ diff --git a/decoder.layers.35.self_attn_layer_norm.bias b/decoder.layers.35.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..e56dd48c9d0c7c1977aa37a8dc2cc6d3ec7e48ab Binary files /dev/null and b/decoder.layers.35.self_attn_layer_norm.bias differ diff --git a/decoder.layers.35.self_attn_layer_norm.weight b/decoder.layers.35.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.35.self_attn_layer_norm.weight differ diff --git a/decoder.layers.36.attn-head-sparsity-predictor.1.weight b/decoder.layers.36.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..305375ceafc8254035bf6eae9aa792fe65bfde0f --- /dev/null +++ b/decoder.layers.36.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91613eac342b8f7012d108488cbbe4409685c6f430af059d74e19d88d4f0d85d +size 28673513 diff --git a/decoder.layers.36.attn-head-sparsity-predictor.2.weight b/decoder.layers.36.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..fe1dcc230075ba1fdc1ea4b9d10f83ef920708c6 Binary files /dev/null and b/decoder.layers.36.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.36.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.36.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..b36d028552d3a7329c2c4a1cb63a69b0459a4601 --- /dev/null +++ b/decoder.layers.36.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:789bb76ab1421a8fdaec54d21e5b8af3a9c5094d764e950c3ebcc1916c7f56c2 +size 28673508 diff --git a/decoder.layers.36.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.36.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..d63314414f9f47b583b52b3501622191a7b617a6 --- /dev/null +++ b/decoder.layers.36.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6282f21bb0812c0026a2a96d6a92fb7c3eb69f84277a927828b5de831329c10 +size 114689508 diff --git a/decoder.layers.36.fc1.bias b/decoder.layers.36.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..18e71d2916b4ae82615b05dfeef27cfbce27d1e0 Binary files /dev/null and b/decoder.layers.36.fc1.bias differ diff --git a/decoder.layers.36.fc1.weight b/decoder.layers.36.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..d4ed9b72a96562517531bf1f00f4a08a5928955c --- /dev/null +++ b/decoder.layers.36.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b29bbb4a145f50bcd0004a9a9cd09df10dc1151a4804c94203ede3852e1a2654 +size 411041792 diff --git a/decoder.layers.36.fc2.bias b/decoder.layers.36.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..f8026f9e7635df0c5854b2df9698b3233432537e Binary files /dev/null and b/decoder.layers.36.fc2.bias differ diff --git a/decoder.layers.36.fc2.weight b/decoder.layers.36.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..c809aef44f21376fadfff7127088fc528bcdb98a --- /dev/null +++ b/decoder.layers.36.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27a91f2cf2481067b5d1a8d85bbabee7bb47e4f6ec342a3b2ae8469bd96e6e2f +size 411041792 diff --git a/decoder.layers.36.final_layer_norm.bias b/decoder.layers.36.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..38d40d42f37293982f8ca10256a7c91f773149f9 Binary files /dev/null and b/decoder.layers.36.final_layer_norm.bias differ diff --git a/decoder.layers.36.final_layer_norm.weight b/decoder.layers.36.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.36.final_layer_norm.weight differ diff --git a/decoder.layers.36.self_attn.catted_head_biases b/decoder.layers.36.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..115d64dd944d08e20ffb8a8ead13d71c81b2e58a Binary files /dev/null and b/decoder.layers.36.self_attn.catted_head_biases differ diff --git a/decoder.layers.36.self_attn.catted_head_weights b/decoder.layers.36.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..0f61a37263b027a224081911169bbfb3e896ae05 --- /dev/null +++ b/decoder.layers.36.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:371769a5866acff46a3b6231a3cdb22541405bbbd83fec451d658015858515c9 +size 411041792 diff --git a/decoder.layers.36.self_attn.out_proj.bias b/decoder.layers.36.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..9bca8b85c037df4b791d3daf09c071a37f98d203 Binary files /dev/null and b/decoder.layers.36.self_attn.out_proj.bias differ diff --git a/decoder.layers.36.self_attn_layer_norm.bias b/decoder.layers.36.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..47664dfdf14292832ba2e155644bb02c98998726 Binary files /dev/null and b/decoder.layers.36.self_attn_layer_norm.bias differ diff --git a/decoder.layers.36.self_attn_layer_norm.weight b/decoder.layers.36.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.36.self_attn_layer_norm.weight differ diff --git a/decoder.layers.37.attn-head-sparsity-predictor.1.weight b/decoder.layers.37.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..9323bb76ac29084b68d8b298a131233df57aa5c7 --- /dev/null +++ b/decoder.layers.37.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1302a42f935dea08e4a98e6281088021f6bd601a1192c38d12988cee3bc269be +size 28673513 diff --git a/decoder.layers.37.attn-head-sparsity-predictor.2.weight b/decoder.layers.37.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..4fe61f27d50424336468d9c5641ce8cdb452d577 Binary files /dev/null and b/decoder.layers.37.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.37.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.37.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..07b947b34d2f40261f0e85296f2d44f02a127ec3 --- /dev/null +++ b/decoder.layers.37.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a950c03e1127306cbd7f77b02bd22ce4ea0afa14d941d14a93d1e361da2af4c +size 28673508 diff --git a/decoder.layers.37.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.37.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..10991501da5d40156aea6b305f667b9be19c0498 --- /dev/null +++ b/decoder.layers.37.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09bad170d7a945beaac19064f574d8042b6c23160c218f9af5eeeb8ffcb665ae +size 114689508 diff --git a/decoder.layers.37.fc1.bias b/decoder.layers.37.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..ea90c1afafde550a93c014a330bac50f26769907 Binary files /dev/null and b/decoder.layers.37.fc1.bias differ diff --git a/decoder.layers.37.fc1.weight b/decoder.layers.37.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..6d126e00e5c191edba1b9e9c05687112d673c402 --- /dev/null +++ b/decoder.layers.37.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd1b4050efce6cce80527dae746ae0212a3da03268896029ab04caa6cc4f770f +size 411041792 diff --git a/decoder.layers.37.fc2.bias b/decoder.layers.37.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..233784ae56aa71667de02325b27765597358f826 Binary files /dev/null and b/decoder.layers.37.fc2.bias differ diff --git a/decoder.layers.37.fc2.weight b/decoder.layers.37.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..57ec8b0c32cc35458e852e8ec5ffc647c630c24b --- /dev/null +++ b/decoder.layers.37.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb2f1b27ef716bd2ad386862ef152c31503a567cc346bfe15bb62358beb9ba20 +size 411041792 diff --git a/decoder.layers.37.final_layer_norm.bias b/decoder.layers.37.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..56a8d77f92a3f7da15fadaad96d7cd586abe3f5c Binary files /dev/null and b/decoder.layers.37.final_layer_norm.bias differ diff --git a/decoder.layers.37.final_layer_norm.weight b/decoder.layers.37.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.37.final_layer_norm.weight differ diff --git a/decoder.layers.37.self_attn.catted_head_biases b/decoder.layers.37.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..634babed9780d4dbcd9aca95883bb8c168fa8ce4 Binary files /dev/null and b/decoder.layers.37.self_attn.catted_head_biases differ diff --git a/decoder.layers.37.self_attn.catted_head_weights b/decoder.layers.37.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..e11839abf1d576c25a99efcdaf5813c02c1e2616 --- /dev/null +++ b/decoder.layers.37.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dde0cebeed0921968e9a3fe5920b2aa165a23788b07a43f30d94d2fd63a0c44 +size 411041792 diff --git a/decoder.layers.37.self_attn.out_proj.bias b/decoder.layers.37.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..33025ab67b70614a36ce04a7730bd838c390f6da Binary files /dev/null and b/decoder.layers.37.self_attn.out_proj.bias differ diff --git a/decoder.layers.37.self_attn_layer_norm.bias b/decoder.layers.37.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..694dbfa794820e4ca82d5ad9851b0f28eee9a77e Binary files /dev/null and b/decoder.layers.37.self_attn_layer_norm.bias differ diff --git a/decoder.layers.37.self_attn_layer_norm.weight b/decoder.layers.37.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.37.self_attn_layer_norm.weight differ diff --git a/decoder.layers.38.attn-head-sparsity-predictor.1.weight b/decoder.layers.38.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..bbb8b445ef7108ee1ebc690eab59b8c937cbf84d --- /dev/null +++ b/decoder.layers.38.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be7d0c31f27e1847c233ad12a4f8a4d03a38fd85745cf70afd7ab6aa10ca9d69 +size 28673513 diff --git a/decoder.layers.38.attn-head-sparsity-predictor.2.weight b/decoder.layers.38.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..f519b9fb1e7f9586e56951eb6a82bb934f354d53 Binary files /dev/null and b/decoder.layers.38.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.38.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.38.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..d5c17753eb5e64dcd38cef89803076b68359035e --- /dev/null +++ b/decoder.layers.38.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13517b4c596046e85bccfa224df224366050ed435dfcec76909771b2e2ecafef +size 28673508 diff --git a/decoder.layers.38.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.38.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..a67d25432ed82f1e436b5a9219302eb90440a4a5 --- /dev/null +++ b/decoder.layers.38.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1dbd45d3a981ca0f43a6d5f8b8f01cc2ec28b72a4572c2639eab7f2b97308c4 +size 114689508 diff --git a/decoder.layers.38.fc1.bias b/decoder.layers.38.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..37ae91afc4a4a6ec457a548cc0fc931de8bd0212 Binary files /dev/null and b/decoder.layers.38.fc1.bias differ diff --git a/decoder.layers.38.fc1.weight b/decoder.layers.38.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..fd2f54a361de09f77521ebf1f03e7437d3ca0092 --- /dev/null +++ b/decoder.layers.38.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b10c9b14cf72b72170f15da7ec393fcb10a303a45013ca32c36eb65936200bae +size 411041792 diff --git a/decoder.layers.38.fc2.bias b/decoder.layers.38.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..cc47a2ec9cbde97517c1db5d1cdeeeba126b81a7 Binary files /dev/null and b/decoder.layers.38.fc2.bias differ diff --git a/decoder.layers.38.fc2.weight b/decoder.layers.38.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..fa994348e79a70fe41c6a9d03b30b7b6d80adfb2 --- /dev/null +++ b/decoder.layers.38.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efc0c0d84dfb9bdc2654640ed69576439592ff9e59e1ab5bcb9689ba6980ee4b +size 411041792 diff --git a/decoder.layers.38.final_layer_norm.bias b/decoder.layers.38.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..e3dd1a91b41346e98d4c61bd1792b7b5e0362690 Binary files /dev/null and b/decoder.layers.38.final_layer_norm.bias differ diff --git a/decoder.layers.38.final_layer_norm.weight b/decoder.layers.38.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.38.final_layer_norm.weight differ diff --git a/decoder.layers.38.self_attn.catted_head_biases b/decoder.layers.38.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..a0086adb1d60498a261a8cf25a7bc9a0d0b4fba2 Binary files /dev/null and b/decoder.layers.38.self_attn.catted_head_biases differ diff --git a/decoder.layers.38.self_attn.catted_head_weights b/decoder.layers.38.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..808d4f3ad4c3d5a3556af2a7bd56f147aa417b18 --- /dev/null +++ b/decoder.layers.38.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:346e413360eb46c773b42fa0060e65c0171a22ea04247ddd23221631c24d4431 +size 411041792 diff --git a/decoder.layers.38.self_attn.out_proj.bias b/decoder.layers.38.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..07e79f4bc07b9b4d580b5d1934913ce8a6c2adc4 Binary files /dev/null and b/decoder.layers.38.self_attn.out_proj.bias differ diff --git a/decoder.layers.38.self_attn_layer_norm.bias b/decoder.layers.38.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..a6031eea984482e77f055e0f73fc58aee1697902 Binary files /dev/null and b/decoder.layers.38.self_attn_layer_norm.bias differ diff --git a/decoder.layers.38.self_attn_layer_norm.weight b/decoder.layers.38.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.38.self_attn_layer_norm.weight differ diff --git a/decoder.layers.39.attn-head-sparsity-predictor.1.weight b/decoder.layers.39.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..d4ee7a1849d5c270faf2502aff6925babd8dfa65 --- /dev/null +++ b/decoder.layers.39.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d290f6b2e34a24f6d31eb0db23ca4175877a4d5604f52512d43f1c86c69effc +size 28673513 diff --git a/decoder.layers.39.attn-head-sparsity-predictor.2.weight b/decoder.layers.39.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..7c6dd422703d73935f1d35d0bcafab69e3ab4206 Binary files /dev/null and b/decoder.layers.39.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.39.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.39.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..8a71764278da675a05e7f03aeaa33ee500a0e398 --- /dev/null +++ b/decoder.layers.39.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8434cde7a9f0ca4c85ca89f9e867782fb481e4cd45077a51b1947d10ab469f51 +size 28673508 diff --git a/decoder.layers.39.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.39.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..ddda860b1510fdaaa16ecb994f62fa0e873da8a4 --- /dev/null +++ b/decoder.layers.39.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b14d6f7ec6fe78649287ce8b7f437a4059e68d3593d1d96294ce7627a9cabe4 +size 114689508 diff --git a/decoder.layers.39.fc1.bias b/decoder.layers.39.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..4a050361fd5faadbcf93e207df837778e3dbb652 Binary files /dev/null and b/decoder.layers.39.fc1.bias differ diff --git a/decoder.layers.39.fc1.weight b/decoder.layers.39.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..4d0750556bd4ee10ab102f6ed91344168afddbb5 --- /dev/null +++ b/decoder.layers.39.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:549f47ba2b784b3fbe6125a9b73eb72bc1583c1a75236b761e6c6d2f63716ef0 +size 411041792 diff --git a/decoder.layers.39.fc2.bias b/decoder.layers.39.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..d3b61a013c395f889b2241b7cc726ac5f6f77900 Binary files /dev/null and b/decoder.layers.39.fc2.bias differ diff --git a/decoder.layers.39.fc2.weight b/decoder.layers.39.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..15be2b730663ec5671e52ce62aeb3d3b367a3973 --- /dev/null +++ b/decoder.layers.39.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cd985db44c56bc18a57470844163b439f7563540cfa08583146bf1305e1e5d0 +size 411041792 diff --git a/decoder.layers.39.final_layer_norm.bias b/decoder.layers.39.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..d54358596e187062618e5481fd2bd2cb8766b29f Binary files /dev/null and b/decoder.layers.39.final_layer_norm.bias differ diff --git a/decoder.layers.39.final_layer_norm.weight b/decoder.layers.39.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.39.final_layer_norm.weight differ diff --git a/decoder.layers.39.self_attn.catted_head_biases b/decoder.layers.39.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..ca9ee02a44b600569881109f5473e13055d69b5e Binary files /dev/null and b/decoder.layers.39.self_attn.catted_head_biases differ diff --git a/decoder.layers.39.self_attn.catted_head_weights b/decoder.layers.39.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..bf039d7c1cfd0bd2effce1376d20fcaacf5c706c --- /dev/null +++ b/decoder.layers.39.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ab0c943fcf3b2bf6b1920a462e8a34220bc7358721bc610756a34ed4fff9524 +size 411041792 diff --git a/decoder.layers.39.self_attn.out_proj.bias b/decoder.layers.39.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..4bc9da5855b84c03e779d5af3f6efdebb1a99180 Binary files /dev/null and b/decoder.layers.39.self_attn.out_proj.bias differ diff --git a/decoder.layers.39.self_attn_layer_norm.bias b/decoder.layers.39.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..c622a637bde31dead7591909eca368980efcec05 Binary files /dev/null and b/decoder.layers.39.self_attn_layer_norm.bias differ diff --git a/decoder.layers.39.self_attn_layer_norm.weight b/decoder.layers.39.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.39.self_attn_layer_norm.weight differ diff --git a/decoder.layers.4.attn-head-sparsity-predictor.1.weight b/decoder.layers.4.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..f1b25314dfc3bc0b7f0c4cd1615eb1fc4f21148c --- /dev/null +++ b/decoder.layers.4.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:926e82bc03fb9add0936cece9fef7cb2010c60552376ab7bb777a4c289167672 +size 28673508 diff --git a/decoder.layers.4.attn-head-sparsity-predictor.2.weight b/decoder.layers.4.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..6cdf4588dc2c366e258f98a71fd3def49fb0cb51 Binary files /dev/null and b/decoder.layers.4.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.4.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.4.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..a8f17dfbf08cdc97672f4fa5423b7a9f9da16d40 --- /dev/null +++ b/decoder.layers.4.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7824b69049c5ad994543623a2b5ea7f66e262aed86b04d7809c201ab1fe20b8d +size 28673503 diff --git a/decoder.layers.4.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.4.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..1abbd0929e8b631c1fa44295437170fd30afdb73 --- /dev/null +++ b/decoder.layers.4.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:728fc342433a9656b0862795acddb265decf078ad02d22b47adc52af58b3f525 +size 114689503 diff --git a/decoder.layers.4.fc1.bias b/decoder.layers.4.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..c0c5fdd1166773fdb0d1f70bb5d035f6a7e00a3e Binary files /dev/null and b/decoder.layers.4.fc1.bias differ diff --git a/decoder.layers.4.fc1.weight b/decoder.layers.4.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..8e69f7fa91216c32cd46dc0903ce302e5fbba89a --- /dev/null +++ b/decoder.layers.4.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d483812fdff260410db440b26e84da0665e02d8135d5f95398131fa5278d0ffc +size 411041792 diff --git a/decoder.layers.4.fc2.bias b/decoder.layers.4.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..c56d1953aa4771e23a754768ae2aa6a599acf6ee Binary files /dev/null and b/decoder.layers.4.fc2.bias differ diff --git a/decoder.layers.4.fc2.weight b/decoder.layers.4.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..2fc7d5455d233803179a0a2ec4d0c3d0152fba19 --- /dev/null +++ b/decoder.layers.4.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58ae7eab2dd1819643ea4cb855d839954dadb450b8e1099990afa7ba96d90039 +size 411041792 diff --git a/decoder.layers.4.final_layer_norm.bias b/decoder.layers.4.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..3c59123ee5aa11d9d8aeb26191bf1fd79ee6deed Binary files /dev/null and b/decoder.layers.4.final_layer_norm.bias differ diff --git a/decoder.layers.4.final_layer_norm.weight b/decoder.layers.4.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.4.final_layer_norm.weight differ diff --git a/decoder.layers.4.self_attn.catted_head_biases b/decoder.layers.4.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..950fdfb534ec763879e4e97bb783c57cbcbf7eeb Binary files /dev/null and b/decoder.layers.4.self_attn.catted_head_biases differ diff --git a/decoder.layers.4.self_attn.catted_head_weights b/decoder.layers.4.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..d99b919c247ae48d530787e974eb821c9636e24e --- /dev/null +++ b/decoder.layers.4.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34060292afa1dad8015521951bfb75ba07acaaa07001f579da6b6f980900aa19 +size 411041792 diff --git a/decoder.layers.4.self_attn.out_proj.bias b/decoder.layers.4.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..a3dbbbf90c079cc3b2432a69932d20edaf104e70 Binary files /dev/null and b/decoder.layers.4.self_attn.out_proj.bias differ diff --git a/decoder.layers.4.self_attn_layer_norm.bias b/decoder.layers.4.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..b947506a27d5ef14b8edbc537670db092683195b Binary files /dev/null and b/decoder.layers.4.self_attn_layer_norm.bias differ diff --git a/decoder.layers.4.self_attn_layer_norm.weight b/decoder.layers.4.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.4.self_attn_layer_norm.weight differ diff --git a/decoder.layers.40.attn-head-sparsity-predictor.1.weight b/decoder.layers.40.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..90187a41d4b7e6de93ec291bf6a05191cc844a2e --- /dev/null +++ b/decoder.layers.40.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa4f8c6136b832b841b3366085f2899df8fad817b45153722ce2627154607f36 +size 28673513 diff --git a/decoder.layers.40.attn-head-sparsity-predictor.2.weight b/decoder.layers.40.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..6e0a2984a209df6745bcfe1484e6246ef07ec8f7 Binary files /dev/null and b/decoder.layers.40.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.40.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.40.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..d42e5fa3d09e37da9d5e58a5ab376a12ec173fe4 --- /dev/null +++ b/decoder.layers.40.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35e01cfe09457787d0819982a331d1fe98ae0f9f69e82aea86d351bb35164e33 +size 28673508 diff --git a/decoder.layers.40.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.40.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..5b252430371fe91f5bcb045097f0ba63bc9b6223 --- /dev/null +++ b/decoder.layers.40.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da2a29758f55f73d3fa09120fba848d1e9d831f9048677f58fc6a7c3f0875a9d +size 114689508 diff --git a/decoder.layers.40.fc1.bias b/decoder.layers.40.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..1676b0282431038c0be62030b887d3b1dcbb6373 Binary files /dev/null and b/decoder.layers.40.fc1.bias differ diff --git a/decoder.layers.40.fc1.weight b/decoder.layers.40.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..202336e023cc1417b1e730c524a2ccaeb9671620 --- /dev/null +++ b/decoder.layers.40.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c198539d908842133e2ec86fe4fff825788fd2716774a6edb00e520e44893ff +size 411041792 diff --git a/decoder.layers.40.fc2.bias b/decoder.layers.40.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..066442b5a28f44426ba68972dfc29fdde0bcf97a Binary files /dev/null and b/decoder.layers.40.fc2.bias differ diff --git a/decoder.layers.40.fc2.weight b/decoder.layers.40.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..cee70d325fb218ce714119ee14d2390c688d9441 --- /dev/null +++ b/decoder.layers.40.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05ebe76c17eafbf21be230abcc717f215c25e113dcc8d36cc5126377bc830027 +size 411041792 diff --git a/decoder.layers.40.final_layer_norm.bias b/decoder.layers.40.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..77c85cd78f1f63102b7e502a03d61f42d953b401 Binary files /dev/null and b/decoder.layers.40.final_layer_norm.bias differ diff --git a/decoder.layers.40.final_layer_norm.weight b/decoder.layers.40.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.40.final_layer_norm.weight differ diff --git a/decoder.layers.40.self_attn.catted_head_biases b/decoder.layers.40.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..94f89a72166135f1fb313f338d3a4ed5232fed69 Binary files /dev/null and b/decoder.layers.40.self_attn.catted_head_biases differ diff --git a/decoder.layers.40.self_attn.catted_head_weights b/decoder.layers.40.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..4ff438b49ad2a042d75a18f9f422ba7287367c5e --- /dev/null +++ b/decoder.layers.40.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d05be97bd7eb9423942b16a558f2e7de40eae48c4fe08b72fa21bb1988d56b68 +size 411041792 diff --git a/decoder.layers.40.self_attn.out_proj.bias b/decoder.layers.40.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..eefa60b4512d9ded176bfd39bf89c54ef22824dd Binary files /dev/null and b/decoder.layers.40.self_attn.out_proj.bias differ diff --git a/decoder.layers.40.self_attn_layer_norm.bias b/decoder.layers.40.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..5786c12c7c7c46698208bf7cc31e35e45ad289e1 Binary files /dev/null and b/decoder.layers.40.self_attn_layer_norm.bias differ diff --git a/decoder.layers.40.self_attn_layer_norm.weight b/decoder.layers.40.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.40.self_attn_layer_norm.weight differ diff --git a/decoder.layers.41.attn-head-sparsity-predictor.1.weight b/decoder.layers.41.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..4f82830a35d6d2311af3c9b8b9c8a5bd7a18756f --- /dev/null +++ b/decoder.layers.41.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f20518e97c6cdb1ced0e68a44400d0c2b31c469de93e128caa553a9f352738d6 +size 28673513 diff --git a/decoder.layers.41.attn-head-sparsity-predictor.2.weight b/decoder.layers.41.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..35db5dbcca6a62dc7b8d6d3cf6288469830b7c02 Binary files /dev/null and b/decoder.layers.41.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.41.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.41.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..e12cd20dca3cd32dcb5b29a79e5be809f744802e --- /dev/null +++ b/decoder.layers.41.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae91ba56ec6973297fc11f4eca7f8527beb3c936388a7d78438977d29cdd4cb7 +size 28673508 diff --git a/decoder.layers.41.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.41.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..4d558e3ad56c5947cc491e3ddba5007f36815b4f --- /dev/null +++ b/decoder.layers.41.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:481bede62a7f0fb3b241012c3237d902d8b048aa890676421f65a401d54175f6 +size 114689508 diff --git a/decoder.layers.41.fc1.bias b/decoder.layers.41.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..6c60289b14b7384df8f4dc7b01d948f53d028aaa Binary files /dev/null and b/decoder.layers.41.fc1.bias differ diff --git a/decoder.layers.41.fc1.weight b/decoder.layers.41.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..0aa166b1c3b89055554437e46f2ff7fbddf15b99 --- /dev/null +++ b/decoder.layers.41.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fbb7ce2e2c7c5b8d3f00639ae266c8a8a866c8461dbc96eba42d2093ff95b0d +size 411041792 diff --git a/decoder.layers.41.fc2.bias b/decoder.layers.41.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..38e51340e1dc854f66d514a12d5884818b63f483 Binary files /dev/null and b/decoder.layers.41.fc2.bias differ diff --git a/decoder.layers.41.fc2.weight b/decoder.layers.41.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..2d7de70c53f31dce4c3bffc6d97fecf773599581 --- /dev/null +++ b/decoder.layers.41.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2da251d92b664b3985a9c064e41b9deac5c6342d653755012c12ab9ac09d9460 +size 411041792 diff --git a/decoder.layers.41.final_layer_norm.bias b/decoder.layers.41.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..781631e62b3faddae3b7d9a1f9b547a61514c7da Binary files /dev/null and b/decoder.layers.41.final_layer_norm.bias differ diff --git a/decoder.layers.41.final_layer_norm.weight b/decoder.layers.41.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.41.final_layer_norm.weight differ diff --git a/decoder.layers.41.self_attn.catted_head_biases b/decoder.layers.41.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..24868e8c30e80c9c281b9d11cd8641ae2589e6c3 Binary files /dev/null and b/decoder.layers.41.self_attn.catted_head_biases differ diff --git a/decoder.layers.41.self_attn.catted_head_weights b/decoder.layers.41.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..373451e939497057889d5793ad98c7c4d457c53b --- /dev/null +++ b/decoder.layers.41.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:754570f5384638d1bb2fb7a7dda883a59f9aba52ee0f77cbb2a563c686ea2a14 +size 411041792 diff --git a/decoder.layers.41.self_attn.out_proj.bias b/decoder.layers.41.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..23d851edd3b7c2cb971156cbb185ed25d0c5ebea Binary files /dev/null and b/decoder.layers.41.self_attn.out_proj.bias differ diff --git a/decoder.layers.41.self_attn_layer_norm.bias b/decoder.layers.41.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..66262293487f9097a9c459a6aab2032834e5f752 Binary files /dev/null and b/decoder.layers.41.self_attn_layer_norm.bias differ diff --git a/decoder.layers.41.self_attn_layer_norm.weight b/decoder.layers.41.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.41.self_attn_layer_norm.weight differ diff --git a/decoder.layers.42.attn-head-sparsity-predictor.1.weight b/decoder.layers.42.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..e2d9a37597da9100b0e380c0c3adaa2bc93b29ef --- /dev/null +++ b/decoder.layers.42.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4491ce511b41bf7cb6e49875096beaa2e9717064be9b918ac5ddbd615ca6dc0c +size 28673513 diff --git a/decoder.layers.42.attn-head-sparsity-predictor.2.weight b/decoder.layers.42.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..adee5ad06ed60aca4be8e2c6746e260677d0764a Binary files /dev/null and b/decoder.layers.42.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.42.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.42.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..89d333b64fed889912c580e57f1c4cd74456b33f --- /dev/null +++ b/decoder.layers.42.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d079f294ccc79eb02abb71ccc9c158c3074ee2ba72acad174cdb3ff9b21288b5 +size 28673508 diff --git a/decoder.layers.42.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.42.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..b37294c1f492d9faaf143b1a330c85b5c857cfd8 --- /dev/null +++ b/decoder.layers.42.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b049fb4af7b40685b63ae13d820a0b790ff626248999ce8982f0bfb8ca4761b1 +size 114689508 diff --git a/decoder.layers.42.fc1.bias b/decoder.layers.42.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..2bc406399650b6145d576ea16e0683c33f371d4a Binary files /dev/null and b/decoder.layers.42.fc1.bias differ diff --git a/decoder.layers.42.fc1.weight b/decoder.layers.42.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..f5949da8112c704ad15b2a26976c927008edca22 --- /dev/null +++ b/decoder.layers.42.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b24c5d1fb070425c40773fae9173fe7ab92f34de08142162467460c4343463b +size 411041792 diff --git a/decoder.layers.42.fc2.bias b/decoder.layers.42.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..ef36ee66a94324a1901a1133b7ede8e8422b0f30 Binary files /dev/null and b/decoder.layers.42.fc2.bias differ diff --git a/decoder.layers.42.fc2.weight b/decoder.layers.42.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..64d7c4eaf316c6aff59aa92f5687fac3d4eb8427 --- /dev/null +++ b/decoder.layers.42.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12b1fe2c3dc567e3e42ad7cc6552f95c1ae8b32f2f4b64691e3315861d083409 +size 411041792 diff --git a/decoder.layers.42.final_layer_norm.bias b/decoder.layers.42.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..e25819c3d5231c1c49ea40746d8e3164749decaf Binary files /dev/null and b/decoder.layers.42.final_layer_norm.bias differ diff --git a/decoder.layers.42.final_layer_norm.weight b/decoder.layers.42.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.42.final_layer_norm.weight differ diff --git a/decoder.layers.42.self_attn.catted_head_biases b/decoder.layers.42.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..e56b980221891400b5c502e87b7c1973d808c9f1 Binary files /dev/null and b/decoder.layers.42.self_attn.catted_head_biases differ diff --git a/decoder.layers.42.self_attn.catted_head_weights b/decoder.layers.42.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..51331f00993e962cef760de6795202f1e7667be6 --- /dev/null +++ b/decoder.layers.42.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8490ea543285c5036d762fc2c264553870c9d70b995871422f022f0b7e39e983 +size 411041792 diff --git a/decoder.layers.42.self_attn.out_proj.bias b/decoder.layers.42.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..b0847e634803c4966b6faab4ba31016475c52c7f Binary files /dev/null and b/decoder.layers.42.self_attn.out_proj.bias differ diff --git a/decoder.layers.42.self_attn_layer_norm.bias b/decoder.layers.42.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..24f80ed4ccb3620815dea5827e95193c0b25c736 Binary files /dev/null and b/decoder.layers.42.self_attn_layer_norm.bias differ diff --git a/decoder.layers.42.self_attn_layer_norm.weight b/decoder.layers.42.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.42.self_attn_layer_norm.weight differ diff --git a/decoder.layers.43.attn-head-sparsity-predictor.1.weight b/decoder.layers.43.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..d06b34316ca8037b4b07974a8e150d1f8e874e88 --- /dev/null +++ b/decoder.layers.43.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c227fdcf8f4360beca1e46ff9c311cd17906a9373d50ed47b158771ca582162 +size 28673513 diff --git a/decoder.layers.43.attn-head-sparsity-predictor.2.weight b/decoder.layers.43.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..6134e6f9dd3b548a9ecca6af6b950a8376af4391 Binary files /dev/null and b/decoder.layers.43.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.43.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.43.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..136c3b0a6d3323073538b18c27a4518420fd114c --- /dev/null +++ b/decoder.layers.43.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3198693d34c0cf25ec354c01b00cfd09325dec1f6daf00815faab8560d63361 +size 28673508 diff --git a/decoder.layers.43.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.43.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..4d6a801b9d0c3a26a4bf37e4432c94cd40a4e966 --- /dev/null +++ b/decoder.layers.43.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad9ab09e540e55a5266df19a90108bbf66842818dba66eba62c63cd3b0509078 +size 114689508 diff --git a/decoder.layers.43.fc1.bias b/decoder.layers.43.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..426339620dae7a11f44be5bf83c7147eda0f1e06 Binary files /dev/null and b/decoder.layers.43.fc1.bias differ diff --git a/decoder.layers.43.fc1.weight b/decoder.layers.43.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..62ca2418570eda41ab2137cf64295e341f12b204 --- /dev/null +++ b/decoder.layers.43.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:807ec7c9a099baab190decc6c466309ff6d045ab839c610611cb4e5232987c9d +size 411041792 diff --git a/decoder.layers.43.fc2.bias b/decoder.layers.43.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..5b68af370278571631f5b2e022a067b8fe74e88c Binary files /dev/null and b/decoder.layers.43.fc2.bias differ diff --git a/decoder.layers.43.fc2.weight b/decoder.layers.43.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..948b9d9ba19cb1e7c27e6b94dc4da4f571f224bf --- /dev/null +++ b/decoder.layers.43.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:985d417abe3379ba8d0cb120acb8fc35c6f0f1ba28c77ffaebc19d7170d72f69 +size 411041792 diff --git a/decoder.layers.43.final_layer_norm.bias b/decoder.layers.43.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..41b58dfa85f7ea3669655f95b96d4787e7bfea80 Binary files /dev/null and b/decoder.layers.43.final_layer_norm.bias differ diff --git a/decoder.layers.43.final_layer_norm.weight b/decoder.layers.43.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.43.final_layer_norm.weight differ diff --git a/decoder.layers.43.self_attn.catted_head_biases b/decoder.layers.43.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..dc7bbfd9e639fa4e6a8f570dddec8522373ae984 Binary files /dev/null and b/decoder.layers.43.self_attn.catted_head_biases differ diff --git a/decoder.layers.43.self_attn.catted_head_weights b/decoder.layers.43.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..ded5a10a1e0a233f4c9b3147ed7764a88aadd793 --- /dev/null +++ b/decoder.layers.43.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aae2a4b1a3fde313f3da2cf51fa2696dd3fb1c59954abfab12e1f1dbaea1ec44 +size 411041792 diff --git a/decoder.layers.43.self_attn.out_proj.bias b/decoder.layers.43.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..b0344336bffddd14f1c6e09c2f05e0070d5f60d9 Binary files /dev/null and b/decoder.layers.43.self_attn.out_proj.bias differ diff --git a/decoder.layers.43.self_attn_layer_norm.bias b/decoder.layers.43.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..9bde88d28da81a4d3b88ca7d54e941bed8466b81 Binary files /dev/null and b/decoder.layers.43.self_attn_layer_norm.bias differ diff --git a/decoder.layers.43.self_attn_layer_norm.weight b/decoder.layers.43.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.43.self_attn_layer_norm.weight differ diff --git a/decoder.layers.44.attn-head-sparsity-predictor.1.weight b/decoder.layers.44.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..652ebbb67efc237123803803dd410ec411f07319 --- /dev/null +++ b/decoder.layers.44.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:616516266420e22156d80cf987abdc7fa48ffc71e92d04561463cefae7a6828b +size 28673513 diff --git a/decoder.layers.44.attn-head-sparsity-predictor.2.weight b/decoder.layers.44.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..da00f291d17e3f2de7f4ec2d5413472370162476 Binary files /dev/null and b/decoder.layers.44.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.44.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.44.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..61d5a732c4b4cb6145e7b5ba1b33028507978e42 --- /dev/null +++ b/decoder.layers.44.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edd01c047c1288d8bf2ebda1f4344373ba4d727cbd7a954310bc159446993255 +size 28673508 diff --git a/decoder.layers.44.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.44.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..144dc4b27b0ff458c6cda0fc41bbd656581e6e23 --- /dev/null +++ b/decoder.layers.44.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b14113d4bd39cec343d266f816cfa43f685648db2213ce890b94628bacdbf38 +size 114689508 diff --git a/decoder.layers.44.fc1.bias b/decoder.layers.44.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..5e70355e50f5bcbd682afc1fc73847cbbd7188ba Binary files /dev/null and b/decoder.layers.44.fc1.bias differ diff --git a/decoder.layers.44.fc1.weight b/decoder.layers.44.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..14491f90d73ce8f984b478023ed466f7489c3dfb --- /dev/null +++ b/decoder.layers.44.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b163e1f7ac26f488a033b31ed0589aa5e9d175df9e62770991a961c2cd9b97b +size 411041792 diff --git a/decoder.layers.44.fc2.bias b/decoder.layers.44.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..1b3bb81db93a85ece1cdab5c616006aa7d8adbef Binary files /dev/null and b/decoder.layers.44.fc2.bias differ diff --git a/decoder.layers.44.fc2.weight b/decoder.layers.44.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..267888fb00e768d4d0f3d3c93f9f0a104c808fca --- /dev/null +++ b/decoder.layers.44.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88bf8fbabab8c2f826266667dc27baae79def94b70ed8937647f538b5ce6eb6d +size 411041792 diff --git a/decoder.layers.44.final_layer_norm.bias b/decoder.layers.44.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..70fd78c32219ad4ab3c19a108cdd242af1ad34cc Binary files /dev/null and b/decoder.layers.44.final_layer_norm.bias differ diff --git a/decoder.layers.44.final_layer_norm.weight b/decoder.layers.44.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.44.final_layer_norm.weight differ diff --git a/decoder.layers.44.self_attn.catted_head_biases b/decoder.layers.44.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..65d5885ee66fbe0ea46bbaa4004db3e63db18b93 Binary files /dev/null and b/decoder.layers.44.self_attn.catted_head_biases differ diff --git a/decoder.layers.44.self_attn.catted_head_weights b/decoder.layers.44.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..9336c33ba20fa28d2cc1dda1b973ec657c8abec5 --- /dev/null +++ b/decoder.layers.44.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ef7ae95f8089e6729a7cf35dd09f91efb641c8ac2eadd2fa732f9720f90ca48 +size 411041792 diff --git a/decoder.layers.44.self_attn.out_proj.bias b/decoder.layers.44.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..c7add292bcd2e0eb96673aa8e7e2aa58fa6d62f5 Binary files /dev/null and b/decoder.layers.44.self_attn.out_proj.bias differ diff --git a/decoder.layers.44.self_attn_layer_norm.bias b/decoder.layers.44.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..0bf4e55e85ee72a21fbf8fa90f76f556fe8e1b64 Binary files /dev/null and b/decoder.layers.44.self_attn_layer_norm.bias differ diff --git a/decoder.layers.44.self_attn_layer_norm.weight b/decoder.layers.44.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.44.self_attn_layer_norm.weight differ diff --git a/decoder.layers.45.attn-head-sparsity-predictor.1.weight b/decoder.layers.45.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..b7081a68b1eca3bf9742e27d6e106ddfb26fefd7 --- /dev/null +++ b/decoder.layers.45.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d12c6e70d39ee9d0e17af0700825c2b6d49abc67bca550f892ee28fd422a2bb +size 28673513 diff --git a/decoder.layers.45.attn-head-sparsity-predictor.2.weight b/decoder.layers.45.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..36c3acab150e54e479fda05b7b6429a605658797 Binary files /dev/null and b/decoder.layers.45.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.45.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.45.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..5047290c81533f2f17ff770c5f93be1ed6f277dc --- /dev/null +++ b/decoder.layers.45.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1018581b4869d86f0671dd9bf3ba555f41a0c3b6744e065453390ea64a34b445 +size 28673508 diff --git a/decoder.layers.45.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.45.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..49c5b3ceb3b042a7ede1f2f96ccfa791fad113c0 --- /dev/null +++ b/decoder.layers.45.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45e0083c851b800700c00f87231520083b05e7539731e1d619f1414e9aba4a16 +size 114689508 diff --git a/decoder.layers.45.fc1.bias b/decoder.layers.45.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..330fb97d7a43944ae848c426eee50ea92f0d230f Binary files /dev/null and b/decoder.layers.45.fc1.bias differ diff --git a/decoder.layers.45.fc1.weight b/decoder.layers.45.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..81a36e72d4b6a9618f4e46b0587ae967a36bdcbe --- /dev/null +++ b/decoder.layers.45.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6af05c778573752689d01f15e0200190889c6447a11b6f247f330d193c4a13e2 +size 411041792 diff --git a/decoder.layers.45.fc2.bias b/decoder.layers.45.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..eb7e2b0781bb03d8d1ada3b78171c208a8038382 Binary files /dev/null and b/decoder.layers.45.fc2.bias differ diff --git a/decoder.layers.45.fc2.weight b/decoder.layers.45.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..26bea7497db867c603e868c6c4eb19b279612a41 --- /dev/null +++ b/decoder.layers.45.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94d4d1357d64f977bc9e926d7c5e46c59ca5095d74a3f48fc07fb80a0c131b93 +size 411041792 diff --git a/decoder.layers.45.final_layer_norm.bias b/decoder.layers.45.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..4b2ff0a78ae46f0333bf93bb82cbf886a2e4c06c Binary files /dev/null and b/decoder.layers.45.final_layer_norm.bias differ diff --git a/decoder.layers.45.final_layer_norm.weight b/decoder.layers.45.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.45.final_layer_norm.weight differ diff --git a/decoder.layers.45.self_attn.catted_head_biases b/decoder.layers.45.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..91d5928b90043e92f8f715f2cf7043a548053875 Binary files /dev/null and b/decoder.layers.45.self_attn.catted_head_biases differ diff --git a/decoder.layers.45.self_attn.catted_head_weights b/decoder.layers.45.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..6f226986bd6449231f7ed2ff324745a21369aaba --- /dev/null +++ b/decoder.layers.45.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5e6fa4fcfb89b1f416fd10d02aab7354c9f467fd73ce935861da8d0645870b3 +size 411041792 diff --git a/decoder.layers.45.self_attn.out_proj.bias b/decoder.layers.45.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..b6e79403e96c0ade3c1a58df89007b66e33cf1b8 Binary files /dev/null and b/decoder.layers.45.self_attn.out_proj.bias differ diff --git a/decoder.layers.45.self_attn_layer_norm.bias b/decoder.layers.45.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..378508df863fca2bb0ed0db8462fc6e26b0f1634 Binary files /dev/null and b/decoder.layers.45.self_attn_layer_norm.bias differ diff --git a/decoder.layers.45.self_attn_layer_norm.weight b/decoder.layers.45.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.45.self_attn_layer_norm.weight differ diff --git a/decoder.layers.46.attn-head-sparsity-predictor.1.weight b/decoder.layers.46.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..070ab0640497cbde57044b5702f4c6ca14c278b6 --- /dev/null +++ b/decoder.layers.46.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:245e80856e6db43ee758f58fbf2adeb7e3482735ef36c05aff517c7f86753869 +size 28673513 diff --git a/decoder.layers.46.attn-head-sparsity-predictor.2.weight b/decoder.layers.46.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..a9e84f1c4f13a07ec3832e0fa304d3d4e411af4b Binary files /dev/null and b/decoder.layers.46.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.46.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.46.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..2a9b414fa62df03b29cf52b89769c91049363edd --- /dev/null +++ b/decoder.layers.46.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac9508e1a2be6060c4080b69594bedbaefd567d096b3442cf3c1f8a736776d17 +size 28673508 diff --git a/decoder.layers.46.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.46.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..37b8280076cb56da34fa87e27a64109fb33acb99 --- /dev/null +++ b/decoder.layers.46.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:309bd5d00c6b264fa473f01986e2bbe9a96e250b30b6551ddf45c8f7c48e3319 +size 114689508 diff --git a/decoder.layers.46.fc1.bias b/decoder.layers.46.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..45924f9e73f010620481b58e3d1395a33b281dc3 Binary files /dev/null and b/decoder.layers.46.fc1.bias differ diff --git a/decoder.layers.46.fc1.weight b/decoder.layers.46.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..6683f9dcf9f0872c52bb864de5b3118fcf585630 --- /dev/null +++ b/decoder.layers.46.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b3020a1a911188e1ec502dff5f0b66b64ebe9259218983228ca6c559f558794 +size 411041792 diff --git a/decoder.layers.46.fc2.bias b/decoder.layers.46.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..f916b44071d72fba9a124838b8158dac34eaa41c Binary files /dev/null and b/decoder.layers.46.fc2.bias differ diff --git a/decoder.layers.46.fc2.weight b/decoder.layers.46.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..ed919910524d9587a661251f875de6e45a54cd51 --- /dev/null +++ b/decoder.layers.46.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19892be68665c3a44acfe4eecffdf70c507174bd7ce3dc64dd6b73616e7553ac +size 411041792 diff --git a/decoder.layers.46.final_layer_norm.bias b/decoder.layers.46.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..a1384a8fd52f7608c712f9e057b38cefd5534220 Binary files /dev/null and b/decoder.layers.46.final_layer_norm.bias differ diff --git a/decoder.layers.46.final_layer_norm.weight b/decoder.layers.46.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.46.final_layer_norm.weight differ diff --git a/decoder.layers.46.self_attn.catted_head_biases b/decoder.layers.46.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..e48a6e848f2694d85e030937b1f9513cf329233e Binary files /dev/null and b/decoder.layers.46.self_attn.catted_head_biases differ diff --git a/decoder.layers.46.self_attn.catted_head_weights b/decoder.layers.46.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..b95fe25179200ba5d2ed39a00afd09c88f9c822b --- /dev/null +++ b/decoder.layers.46.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8415f4f96079773ea20d443bfde65c899b5d26dc9a2147d7b83c5dbeaeaa46f4 +size 411041792 diff --git a/decoder.layers.46.self_attn.out_proj.bias b/decoder.layers.46.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..44201dbcc8d5c047e85b24bccaf929b0ce2b8071 Binary files /dev/null and b/decoder.layers.46.self_attn.out_proj.bias differ diff --git a/decoder.layers.46.self_attn_layer_norm.bias b/decoder.layers.46.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..16defb31048d48937d0a4bc43665d9799f2783cb Binary files /dev/null and b/decoder.layers.46.self_attn_layer_norm.bias differ diff --git a/decoder.layers.46.self_attn_layer_norm.weight b/decoder.layers.46.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.46.self_attn_layer_norm.weight differ diff --git a/decoder.layers.47.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.47.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..1cc92738dfde2ba0201db4d23e170fa7d2d92bf3 --- /dev/null +++ b/decoder.layers.47.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ba72f51cba8cf702c1d3555a041320ab5b386548d185a40571d1acafdead6d0 +size 28673508 diff --git a/decoder.layers.47.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.47.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..a95e8ecabb74e711130e70fcbff4527fdc122c15 --- /dev/null +++ b/decoder.layers.47.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88db0ee6bf203b94faa6ebe906d323b9a1f0f7583209ec288f96cc4c88492e69 +size 114689508 diff --git a/decoder.layers.47.fc1.bias b/decoder.layers.47.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..72eaded8399e6f83593bb45dd8ae4cf1b4980eb6 Binary files /dev/null and b/decoder.layers.47.fc1.bias differ diff --git a/decoder.layers.47.fc1.weight b/decoder.layers.47.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..1d89f57c6e963e0acb96a1e025cc61e332e5f3ac --- /dev/null +++ b/decoder.layers.47.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fed044b532cbcbdb6bed68e268961bb523a25e86e9a6b2a1207e54b667c9752 +size 411041792 diff --git a/decoder.layers.47.fc2.bias b/decoder.layers.47.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..becd8afa2d654446791f598da164f996caa309f3 Binary files /dev/null and b/decoder.layers.47.fc2.bias differ diff --git a/decoder.layers.47.fc2.weight b/decoder.layers.47.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..051c34b6be4431366f6ecdde18061fdd870aa472 --- /dev/null +++ b/decoder.layers.47.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e9701902aa995ecb5aedbfd985ab37a983fb84b5d07941173d31ed5acf602b7 +size 411041792 diff --git a/decoder.layers.47.final_layer_norm.bias b/decoder.layers.47.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..ad32b479964d7b6601c2535765435f9aefa8c66e Binary files /dev/null and b/decoder.layers.47.final_layer_norm.bias differ diff --git a/decoder.layers.47.final_layer_norm.weight b/decoder.layers.47.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.47.final_layer_norm.weight differ diff --git a/decoder.layers.47.self_attn.catted_head_biases b/decoder.layers.47.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..20916760e02bed9e5363db28c2c62591186d93e0 Binary files /dev/null and b/decoder.layers.47.self_attn.catted_head_biases differ diff --git a/decoder.layers.47.self_attn.catted_head_weights b/decoder.layers.47.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..d392d60e2a018ff560bf19187db12ef447da4d58 --- /dev/null +++ b/decoder.layers.47.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f57a59c218714cb1d3cb3595859846465b768798155b38187aa2711ceeecedb +size 411041792 diff --git a/decoder.layers.47.self_attn.out_proj.bias b/decoder.layers.47.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..d83cb94f371c76e216d4f07569b70a064f5f09d6 Binary files /dev/null and b/decoder.layers.47.self_attn.out_proj.bias differ diff --git a/decoder.layers.47.self_attn_layer_norm.bias b/decoder.layers.47.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..81dde9f9001d032fdec40f50011c598add638a90 Binary files /dev/null and b/decoder.layers.47.self_attn_layer_norm.bias differ diff --git a/decoder.layers.47.self_attn_layer_norm.weight b/decoder.layers.47.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.47.self_attn_layer_norm.weight differ diff --git a/decoder.layers.5.attn-head-sparsity-predictor.1.weight b/decoder.layers.5.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..a22ece3276f3b241acc7cf70d4f58c9bf54f65f2 --- /dev/null +++ b/decoder.layers.5.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d2bf84f307c24113fc8bea9b99813dff735320c86a98d79bd3ae425e5baf094 +size 28673508 diff --git a/decoder.layers.5.attn-head-sparsity-predictor.2.weight b/decoder.layers.5.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..3d1442e88dc13093c2cf09f22dee90b82894229b Binary files /dev/null and b/decoder.layers.5.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.5.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.5.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..5cd44cfd890d2c4e9a1de18d9b376e9241f0b58d --- /dev/null +++ b/decoder.layers.5.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f287e0a715085b852314bd4d596e60042a975be8829ddd85af921337c9a55e45 +size 28673503 diff --git a/decoder.layers.5.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.5.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..9a9ca36d7b5c70c5ae4b1f10d023bee6bb075e4b --- /dev/null +++ b/decoder.layers.5.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2edc5d5445362e90a9ed10208a25e8331618c0cf863950107822cc42fc30ab34 +size 114689503 diff --git a/decoder.layers.5.fc1.bias b/decoder.layers.5.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..b4c862fdc2731d3f8e51555812fb0f226b802561 Binary files /dev/null and b/decoder.layers.5.fc1.bias differ diff --git a/decoder.layers.5.fc1.weight b/decoder.layers.5.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..11168a0ec5d22709d503636770206af73ad658ea --- /dev/null +++ b/decoder.layers.5.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b8d47c7d4ca438b052eb5b97c6071b6b21a2f6563ffdbf31f6909fca94d395d +size 411041792 diff --git a/decoder.layers.5.fc2.bias b/decoder.layers.5.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..0f129851da24a705b1421f0137c7ddf595325aa9 Binary files /dev/null and b/decoder.layers.5.fc2.bias differ diff --git a/decoder.layers.5.fc2.weight b/decoder.layers.5.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..9fed4d7bbb003030042a4aee324e078a5854c8ec --- /dev/null +++ b/decoder.layers.5.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7047b99925a58a475294754ad1870abe00474fb84e020a4cc93b36b776b7f4f1 +size 411041792 diff --git a/decoder.layers.5.final_layer_norm.bias b/decoder.layers.5.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..f389da9b46fa711ff982417d4b84b3ec92aedd25 Binary files /dev/null and b/decoder.layers.5.final_layer_norm.bias differ diff --git a/decoder.layers.5.final_layer_norm.weight b/decoder.layers.5.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.5.final_layer_norm.weight differ diff --git a/decoder.layers.5.self_attn.catted_head_biases b/decoder.layers.5.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..f305297928a53312d3b84f5b5343dd8fe64814d8 Binary files /dev/null and b/decoder.layers.5.self_attn.catted_head_biases differ diff --git a/decoder.layers.5.self_attn.catted_head_weights b/decoder.layers.5.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..bba9f99285b121744009dd4ee6db534e392211a7 --- /dev/null +++ b/decoder.layers.5.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be316bfa3127c1a045a94547fa60b97f8ccbf0ac524153b623bd5016b42942c4 +size 411041792 diff --git a/decoder.layers.5.self_attn.out_proj.bias b/decoder.layers.5.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..93f371c32058a9bf8c76e3b57de8f0342bafb854 Binary files /dev/null and b/decoder.layers.5.self_attn.out_proj.bias differ diff --git a/decoder.layers.5.self_attn_layer_norm.bias b/decoder.layers.5.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..946cf1a357c1100933ac3ee8b509316ef3a72c00 Binary files /dev/null and b/decoder.layers.5.self_attn_layer_norm.bias differ diff --git a/decoder.layers.5.self_attn_layer_norm.weight b/decoder.layers.5.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.5.self_attn_layer_norm.weight differ diff --git a/decoder.layers.6.attn-head-sparsity-predictor.1.weight b/decoder.layers.6.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..17dc2b56eb72e73a8f5d96c97655ec463396a61d --- /dev/null +++ b/decoder.layers.6.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06db86b7ceadcaede632a7a29f5d15209e1a372f669b787daa2f9361a6f12413 +size 28673508 diff --git a/decoder.layers.6.attn-head-sparsity-predictor.2.weight b/decoder.layers.6.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..912c2555668b578eae545b72616035e8b0fe079c Binary files /dev/null and b/decoder.layers.6.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.6.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.6.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..40aa3fb49b17e7581442fe071d42501a824c3119 --- /dev/null +++ b/decoder.layers.6.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e19b3e5d5f155d43abdcb914c4297c9158c69bb11594fe3fc5bd0a1290f961a4 +size 28673503 diff --git a/decoder.layers.6.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.6.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..fddb7f6ce5baa3eda42c4d0878018cf8c5b25c79 --- /dev/null +++ b/decoder.layers.6.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2be7cd285234e04c6c522cf6a5978d71fb94f1c9c978b5ce12e67da9170fbdd5 +size 114689503 diff --git a/decoder.layers.6.fc1.bias b/decoder.layers.6.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..3ecf7fa681e53cc2e99b5c0ae254417500649a6a Binary files /dev/null and b/decoder.layers.6.fc1.bias differ diff --git a/decoder.layers.6.fc1.weight b/decoder.layers.6.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..b3fe05f5aae493cd6e1042d7a29bd5e2d8e17b76 --- /dev/null +++ b/decoder.layers.6.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18da2fbad501030dc02201fadba0a1db69bf98b9eaff6cdb545b6be281d79977 +size 411041792 diff --git a/decoder.layers.6.fc2.bias b/decoder.layers.6.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..817837641b20d094a1dd583dc9fd8458ee7ca5f0 Binary files /dev/null and b/decoder.layers.6.fc2.bias differ diff --git a/decoder.layers.6.fc2.weight b/decoder.layers.6.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..653c34ffecd2d1d8cd1987304a493af9536782fd --- /dev/null +++ b/decoder.layers.6.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34028dee816a706e4bf5002f375b5d4d40a5457524d470813630df00fe83ceb8 +size 411041792 diff --git a/decoder.layers.6.final_layer_norm.bias b/decoder.layers.6.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..89bef2a7a34e9bc42cc1c52871915032dda0a619 Binary files /dev/null and b/decoder.layers.6.final_layer_norm.bias differ diff --git a/decoder.layers.6.final_layer_norm.weight b/decoder.layers.6.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.6.final_layer_norm.weight differ diff --git a/decoder.layers.6.self_attn.catted_head_biases b/decoder.layers.6.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..eb6a805595ab8c6532633507cd54fc233f15f7a3 Binary files /dev/null and b/decoder.layers.6.self_attn.catted_head_biases differ diff --git a/decoder.layers.6.self_attn.catted_head_weights b/decoder.layers.6.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..39004bbdac5df29f9b9d625e2b29e823046c141e --- /dev/null +++ b/decoder.layers.6.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03313fe35eb1426226bcc4151b0eac1d79c8063bd9c0f0628a8f601bcceeecec +size 411041792 diff --git a/decoder.layers.6.self_attn.out_proj.bias b/decoder.layers.6.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..87b2953e548123787dae3fe735b0b793e296f4ee Binary files /dev/null and b/decoder.layers.6.self_attn.out_proj.bias differ diff --git a/decoder.layers.6.self_attn_layer_norm.bias b/decoder.layers.6.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..10025f08e4d214e74b4791a432a6607c3faa5728 Binary files /dev/null and b/decoder.layers.6.self_attn_layer_norm.bias differ diff --git a/decoder.layers.6.self_attn_layer_norm.weight b/decoder.layers.6.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.6.self_attn_layer_norm.weight differ diff --git a/decoder.layers.7.attn-head-sparsity-predictor.1.weight b/decoder.layers.7.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..5a5bed5177975b6c8d8f46a80cb1cae6dcb1e2e1 --- /dev/null +++ b/decoder.layers.7.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86a4e0e36cee6a171d0a334233c965de2108d107d5aaa0f90ed3f96875d87ed4 +size 28673508 diff --git a/decoder.layers.7.attn-head-sparsity-predictor.2.weight b/decoder.layers.7.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..3b105e33ef37327e4235d2b7d9c61f97f5f792f0 Binary files /dev/null and b/decoder.layers.7.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.7.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.7.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..c7df32d063512ca00708d3ba9c7dc89a07790841 --- /dev/null +++ b/decoder.layers.7.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc24d7461ecdbe8eff5d30764318ff13cf52da6dd39062eec8c1888e2e2c9ac6 +size 28673503 diff --git a/decoder.layers.7.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.7.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..ec7f907523f20ee2002d178964c5c16ed0caf51b --- /dev/null +++ b/decoder.layers.7.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7eabcf72d12e7366da29ce60b9c040eaf8f65dd5d3bfdc5098562817067f74b0 +size 114689503 diff --git a/decoder.layers.7.fc1.bias b/decoder.layers.7.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..3a6832bba09ae7a67e1bed9d8cffffd9f78f2010 Binary files /dev/null and b/decoder.layers.7.fc1.bias differ diff --git a/decoder.layers.7.fc1.weight b/decoder.layers.7.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..3317ef472ecd75e04a1a5d3ef43a67f17376de59 --- /dev/null +++ b/decoder.layers.7.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62d39091fec18509d626acae698b47a9f252ec74bbf6b61c4664d00679841b4f +size 411041792 diff --git a/decoder.layers.7.fc2.bias b/decoder.layers.7.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..4224a107a5f1c8fb50f27d6ebb629fd868faa674 Binary files /dev/null and b/decoder.layers.7.fc2.bias differ diff --git a/decoder.layers.7.fc2.weight b/decoder.layers.7.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..25a587f46c5d71b079ee9b0d7397b0c6cc7df8dc --- /dev/null +++ b/decoder.layers.7.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b8a5e588fedb2c4aaf78dbfaf597817423c2bf8d01975785d9b84d83aba0399 +size 411041792 diff --git a/decoder.layers.7.final_layer_norm.bias b/decoder.layers.7.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..48e84059c2637b6dc9712f43534a54dcdd9ca292 Binary files /dev/null and b/decoder.layers.7.final_layer_norm.bias differ diff --git a/decoder.layers.7.final_layer_norm.weight b/decoder.layers.7.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.7.final_layer_norm.weight differ diff --git a/decoder.layers.7.self_attn.catted_head_biases b/decoder.layers.7.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..d5d7f2b35031e971d647b4a5881b88901034da88 Binary files /dev/null and b/decoder.layers.7.self_attn.catted_head_biases differ diff --git a/decoder.layers.7.self_attn.catted_head_weights b/decoder.layers.7.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..a2c96a3f552eb0300fdfea0c31e513347ed08c64 --- /dev/null +++ b/decoder.layers.7.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1996abb68540bba9f767266e121a1c7f93fd2144475ed7abb7bdd8aca60fea2 +size 411041792 diff --git a/decoder.layers.7.self_attn.out_proj.bias b/decoder.layers.7.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..7eb57bb3e7ae3727dc97475c5bcd21e84d1c099e Binary files /dev/null and b/decoder.layers.7.self_attn.out_proj.bias differ diff --git a/decoder.layers.7.self_attn_layer_norm.bias b/decoder.layers.7.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..49cf1a27fcaefbd36b7fb4437dd65e26820704cd Binary files /dev/null and b/decoder.layers.7.self_attn_layer_norm.bias differ diff --git a/decoder.layers.7.self_attn_layer_norm.weight b/decoder.layers.7.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.7.self_attn_layer_norm.weight differ diff --git a/decoder.layers.8.attn-head-sparsity-predictor.1.weight b/decoder.layers.8.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..f47fa9ffc9bff36d5b708c475601259a2afae569 --- /dev/null +++ b/decoder.layers.8.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a84b50a451aaf714fd38821571fd0a38eae82d07cc39fa375e32a0c77c95596 +size 28673508 diff --git a/decoder.layers.8.attn-head-sparsity-predictor.2.weight b/decoder.layers.8.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..5fffea666bf5e1efe5fe4761cc6b2796d1147152 Binary files /dev/null and b/decoder.layers.8.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.8.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.8.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..7d65db3407b2e46e7e5dc43c2a475dd3a699dc0d --- /dev/null +++ b/decoder.layers.8.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a940ad55022beb20fec1e1509e8190a9921ac14644c1dd8b5de46c729c210547 +size 28673503 diff --git a/decoder.layers.8.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.8.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..754584d19f295e77be5ed48cd01fe6f3d820d294 --- /dev/null +++ b/decoder.layers.8.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4451f030383df4632045c5aad02e1d57993b7dddd13b2ff0f7bce6efb852aeb9 +size 114689503 diff --git a/decoder.layers.8.fc1.bias b/decoder.layers.8.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..0cbb00f9770374198647af1290802d7cd2b0514a Binary files /dev/null and b/decoder.layers.8.fc1.bias differ diff --git a/decoder.layers.8.fc1.weight b/decoder.layers.8.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..7af778cc497badfaaf60cbc9b0696de99223b2e6 --- /dev/null +++ b/decoder.layers.8.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a38beee6147b41f187ef82446b5ce41c2701ecbc371669f10dea017cb265ca6f +size 411041792 diff --git a/decoder.layers.8.fc2.bias b/decoder.layers.8.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..e11f52293327da88bc512cca64628d536a55c34f Binary files /dev/null and b/decoder.layers.8.fc2.bias differ diff --git a/decoder.layers.8.fc2.weight b/decoder.layers.8.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..7b4dd5e8b367923d0efb267744afb392fc4d66a8 --- /dev/null +++ b/decoder.layers.8.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:405bfd00817483d36ad959b06e26506f7358ca3828dd38593c3785f18d404931 +size 411041792 diff --git a/decoder.layers.8.final_layer_norm.bias b/decoder.layers.8.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..ad38160954a279327c8cb334df5205534ef9bcf4 Binary files /dev/null and b/decoder.layers.8.final_layer_norm.bias differ diff --git a/decoder.layers.8.final_layer_norm.weight b/decoder.layers.8.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.8.final_layer_norm.weight differ diff --git a/decoder.layers.8.self_attn.catted_head_biases b/decoder.layers.8.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..376615c76576f7dc2c12c31a2b1226e970bad3ab Binary files /dev/null and b/decoder.layers.8.self_attn.catted_head_biases differ diff --git a/decoder.layers.8.self_attn.catted_head_weights b/decoder.layers.8.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..5bb896704fb62c463e5190b074cf89d497f7ad5e --- /dev/null +++ b/decoder.layers.8.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ca3a072236525c4e5a18f172d7a835d9ee263cc9f5ff64a427e4cd3e2814a4e +size 411041792 diff --git a/decoder.layers.8.self_attn.out_proj.bias b/decoder.layers.8.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..eda329abeba886c6e208b78f81a94a626d12ee01 Binary files /dev/null and b/decoder.layers.8.self_attn.out_proj.bias differ diff --git a/decoder.layers.8.self_attn_layer_norm.bias b/decoder.layers.8.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..807fa5ebd29d0ce53e3bca5ac543dc047bc8838f Binary files /dev/null and b/decoder.layers.8.self_attn_layer_norm.bias differ diff --git a/decoder.layers.8.self_attn_layer_norm.weight b/decoder.layers.8.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.8.self_attn_layer_norm.weight differ diff --git a/decoder.layers.9.attn-head-sparsity-predictor.1.weight b/decoder.layers.9.attn-head-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..a2b7c81f2ad559e66aa42ce231e92e80f13e3fd7 --- /dev/null +++ b/decoder.layers.9.attn-head-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6541e5e4bf889d062a092dc7c7d7e85dc7b43336562cd4c4dc5f957910fee6e +size 28673508 diff --git a/decoder.layers.9.attn-head-sparsity-predictor.2.weight b/decoder.layers.9.attn-head-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..365d2b8bc929c0973305a6dc9074fae4e1dea412 Binary files /dev/null and b/decoder.layers.9.attn-head-sparsity-predictor.2.weight differ diff --git a/decoder.layers.9.attn.mlp-sparsity-predictor.1.weight b/decoder.layers.9.attn.mlp-sparsity-predictor.1.weight new file mode 100644 index 0000000000000000000000000000000000000000..b1236212b060293df6f556502b6bfd845b49e63d --- /dev/null +++ b/decoder.layers.9.attn.mlp-sparsity-predictor.1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:486ecb07d7ba2a0b0ef65624594e3c9396a45f2c787ee821c9421599960677c4 +size 28673503 diff --git a/decoder.layers.9.attn.mlp-sparsity-predictor.2.weight b/decoder.layers.9.attn.mlp-sparsity-predictor.2.weight new file mode 100644 index 0000000000000000000000000000000000000000..4ecee3a742c34022cfdddef12c74d37a87c1697c --- /dev/null +++ b/decoder.layers.9.attn.mlp-sparsity-predictor.2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b110942ff96aabefa3911753d53f1139847b7da9210f5261a40d319c27470669 +size 114689503 diff --git a/decoder.layers.9.fc1.bias b/decoder.layers.9.fc1.bias new file mode 100644 index 0000000000000000000000000000000000000000..589214bccf83391932175d7b7fc8d52a7a5d05c7 Binary files /dev/null and b/decoder.layers.9.fc1.bias differ diff --git a/decoder.layers.9.fc1.weight b/decoder.layers.9.fc1.weight new file mode 100644 index 0000000000000000000000000000000000000000..b365f49a16e508d234968a153393c6bb8cdae21e --- /dev/null +++ b/decoder.layers.9.fc1.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d55aa15f8d4ebc1090241e422a410f543242b0d0a186e80740f2d9d67b154c5 +size 411041792 diff --git a/decoder.layers.9.fc2.bias b/decoder.layers.9.fc2.bias new file mode 100644 index 0000000000000000000000000000000000000000..3ca3b3ed7def07dee27a2fce1c17327166a88035 Binary files /dev/null and b/decoder.layers.9.fc2.bias differ diff --git a/decoder.layers.9.fc2.weight b/decoder.layers.9.fc2.weight new file mode 100644 index 0000000000000000000000000000000000000000..7ad90cf56e7c6a210ef5e1179bfa98d956f8f3b5 --- /dev/null +++ b/decoder.layers.9.fc2.weight @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e73668747c0b741fee5750426a7408d45e3558e4d807351c479f74ada956225 +size 411041792 diff --git a/decoder.layers.9.final_layer_norm.bias b/decoder.layers.9.final_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..cbeb0ea22f37a52e342b27c78a6aef211a3c211b Binary files /dev/null and b/decoder.layers.9.final_layer_norm.bias differ diff --git a/decoder.layers.9.final_layer_norm.weight b/decoder.layers.9.final_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.9.final_layer_norm.weight differ diff --git a/decoder.layers.9.self_attn.catted_head_biases b/decoder.layers.9.self_attn.catted_head_biases new file mode 100644 index 0000000000000000000000000000000000000000..4de78276cb780e30e48b8b8f31eae0f2848d6b00 Binary files /dev/null and b/decoder.layers.9.self_attn.catted_head_biases differ diff --git a/decoder.layers.9.self_attn.catted_head_weights b/decoder.layers.9.self_attn.catted_head_weights new file mode 100644 index 0000000000000000000000000000000000000000..79c01690f9befec1126524197256e46ff2926c4d --- /dev/null +++ b/decoder.layers.9.self_attn.catted_head_weights @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18847b360926ce010634b2b0b3e304fa16b4c3733b49a8086b3cf1ba6ef0bcc9 +size 411041792 diff --git a/decoder.layers.9.self_attn.out_proj.bias b/decoder.layers.9.self_attn.out_proj.bias new file mode 100644 index 0000000000000000000000000000000000000000..5cff3999f9a5f8cad1ca8450e707ebac2ba3e420 Binary files /dev/null and b/decoder.layers.9.self_attn.out_proj.bias differ diff --git a/decoder.layers.9.self_attn_layer_norm.bias b/decoder.layers.9.self_attn_layer_norm.bias new file mode 100644 index 0000000000000000000000000000000000000000..d89e4951afbd16941bb9c4171f2ee967deb2f2ab Binary files /dev/null and b/decoder.layers.9.self_attn_layer_norm.bias differ diff --git a/decoder.layers.9.self_attn_layer_norm.weight b/decoder.layers.9.self_attn_layer_norm.weight new file mode 100644 index 0000000000000000000000000000000000000000..4bc09aebd8c917bd3da02f8b6916cc14b08d5b12 Binary files /dev/null and b/decoder.layers.9.self_attn_layer_norm.weight differ